{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "################################################################################\n",
      "### WARNING, path does not exist: KALDI_ROOT=/mnt/matylda5/iveselyk/Tools/kaldi-trunk\n",
      "###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)\n",
      "###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)\n",
      "################################################################################\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import kaldi_io\n",
    "import kaldiio\n",
    "import json\n",
    "import re\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取feats.scp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "feats_map = {}\n",
    "with open(\"/home1/meichaoyang/dataset/magic_aug/data_reverb/feats.scp\", \"r\") as f:\n",
    "    for line in f:\n",
    "        data = line.split()\n",
    "        feats_map[data[0]] = data[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home1/meichaoyang/dataset/magic_aug/data_reverb/_fbank/raw_fbank_data_reverb.1.ark:31'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feats_map[\"reverb1-14_3466_20170826171159\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取corpus.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus_map = {}\n",
    "with open('/home1/meichaoyang/dataset/magic_aug/data_reverb/text_old', 'r') as f:\n",
    "    for line in f:\n",
    "        data = line.split()\n",
    "        corpus_txt = re.sub(r\"([.!?。！，？、 \\[\\],，])\", r\"\", data[1])\n",
    "        corpus_map[data[0]] = corpus_txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "SOS_token = 0\n",
    "EOS_token = 1\n",
    "import re\n",
    "# string = '我要把你卸载掉'\n",
    "# re.findall(r'.{1}', string)\n",
    "\n",
    "class Lang:\n",
    "    def __init__(self, name):\n",
    "        self.name = name\n",
    "        self.word2index = {\"SOS\":0, \"EOS\":1, \"<unk>\":2}\n",
    "        self.word2count = {}\n",
    "        self.index2word = {0: \"SOS\", 1: \"EOS\",2: \"<unk>\"}\n",
    "        self.n_words = 2  # Count SOS and EOS\n",
    "\n",
    "    def addSentence(self, sentence):\n",
    "        for word in re.findall(r'.{1}', sentence):\n",
    "            self.addWord(word)\n",
    "\n",
    "    def addWord(self, word):\n",
    "        if word not in self.word2index:\n",
    "            self.word2index[word] = self.n_words\n",
    "            self.word2count[word] = 1\n",
    "            self.index2word[self.n_words] = word\n",
    "            self.n_words += 1\n",
    "        else:\n",
    "            self.word2count[word] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalizeString(s):\n",
    "    s = s.lower().strip()\n",
    "    s = re.sub(r\"([.!?。！，？、 \\[\\],，])\", r\" \\1\", s)\n",
    "    return s\n",
    "\n",
    "def readLangs(lang1):\n",
    "    print(\"Reading lines...\")\n",
    "    lang = Lang(lang1)\n",
    "\n",
    "    # Read the file and split into lines\n",
    "    lines = open('/home1/meichaoyang/dataset/magic_aug/data_reverb/text_old', \"r\").\\\n",
    "        read().strip().split('\\n')\n",
    "\n",
    "    for line in lines:\n",
    "        s = line.split()\n",
    "        lang.addSentence(normalizeString(s[1]))\n",
    "\n",
    "    return lang"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading lines...\n"
     ]
    }
   ],
   "source": [
    "lang = readLangs(\"ZH\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 准备json数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "json_data = {}\n",
    "utts = json_data[\"utts\"] = {}\n",
    "dic = json_data[\"dic\"] = {}\n",
    "for i in range(lang.n_words):\n",
    "    dic[lang.index2word[i]] = i\n",
    "    lang.index2word[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "厨\n",
      "房\n",
      "用\n",
      "具\n"
     ]
    }
   ],
   "source": [
    "# IC0001W0001\n",
    "for ch in corpus_map[\"IC0001W0001\"]:\n",
    "    print(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = list(feats_map.keys())\n",
    "for utt in keys:\n",
    "    utts[utt] = {}\n",
    "    utts[utt][\"input\"] = {}\n",
    "    utts[utt][\"input\"][\"feat\"] = feats_map[utt]\n",
    "    utts[utt][\"input\"][\"shape\"] = list(kaldi_io.read_mat(feats_map[utt]).shape)\n",
    "    utts[utt][\"output\"] = {}\n",
    "    utts[utt][\"output\"][\"text\"] = corpus_map[utt]\n",
    "    tokenid = [lang.word2index[ch] if ch in lang.word2index else lang.word2index['<unk>']  for ch in corpus_map[utt]]\n",
    "    utts[utt][\"output\"][\"tokenid\"] = tokenid\n",
    "    utts[utt][\"output\"][\"shape\"] = [len(tokenid), lang.n_words]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = kaldi_io.read_mat(feats_map[\"reverb1-14_3466_20170826171159\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(304, 80)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a[:a.shape[0]//4*4].shape\n",
    "# a.shape[0]//4*4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "b = np.array([[1,2,3],[4,5,6]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 2],\n",
       "       [4, 5]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b[:,0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lang.word2index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"train_chars.txt\" ,\"w\") as f:\n",
    "    for i in lang.word2index:\n",
    "        f.write(i+\" \"+str(lang.word2index[i])+\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text': '请语言播放小说', 'tokenid': [2, 3, 4, 5, 6, 7, 8], 'shape': [7, 4519]}"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "utts[\"reverb1-14_3466_20170826171159\"][\"output\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 写入json文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('data.json', 'w') as f:\n",
    "    json.dump(json_data, f, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取json文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('data.json', 'r') as f:\n",
    "    json_data = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "573480"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(json_data[\"utts\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "feats_map = {}\n",
    "with open(\"/home1/meichaoyang/dataset/data_aishell2/feats/test/feats.scp\", \"r\") as f:\n",
    "    for line in f:\n",
    "        data = line.split()\n",
    "        feats_map[data[0]] = data[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus_map = {}\n",
    "with open('/home1/meichaoyang/dataset/data_aishell2/feats/test/text', 'r') as f:\n",
    "    for line in f:\n",
    "        data = line.split()\n",
    "        corpus_txt = re.sub(r\"([.!?。！，？、 \\[\\],，])\", r\"\", data[1])\n",
    "        corpus_map[data[0]] = corpus_txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "json_data = {}\n",
    "utts = json_data[\"utts\"] = {}\n",
    "dic = json_data[\"dic\"] = {}\n",
    "for i in range(lang.n_words):\n",
    "    dic[lang.index2word[i]] = i\n",
    "    lang.index2word[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = list(feats_map.keys())\n",
    "for utt in keys:\n",
    "    utts[utt] = {}\n",
    "    utts[utt][\"input\"] = {}\n",
    "    utts[utt][\"input\"][\"feat\"] = feats_map[utt]\n",
    "    utts[utt][\"input\"][\"shape\"] = list(kaldi_io.read_mat(feats_map[utt]).shape)\n",
    "    utts[utt][\"output\"] = {}\n",
    "    utts[utt][\"output\"][\"text\"] = corpus_map[utt]\n",
    "    tokenid = [lang.word2index[ch] if ch in lang.word2index else lang.word2index['<unk>']  for ch in corpus_map[utt]]\n",
    "    utts[utt][\"output\"][\"tokenid\"] = tokenid\n",
    "    utts[utt][\"output\"][\"shape\"] = [len(tokenid), lang.n_words]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('data_test.json', 'w') as f:\n",
    "    json.dump(json_data, f, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 测试data4.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "from io import open\n",
    "import unicodedata\n",
    "import string\n",
    "import re\n",
    "\n",
    "import torch\n",
    "import torchaudio\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch import nn\n",
    "from torch import optim\n",
    "import torch.nn.functional as F\n",
    "from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence\n",
    "from pathlib import Path\n",
    "import kaldi_io\n",
    "import sys\n",
    "import gc\n",
    "import json\n",
    "import time\n",
    "from data_4 import AudioDataLoader, AudioDataset, pad_list\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "print_use = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_json = \"data.json\"\n",
    "test_json = \"data_test.json\"\n",
    "batch_size = 32\n",
    "maxlen_in = 100000\n",
    "maxlen_out = 30\n",
    "num_workers = 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "te_dataset = AudioDataset(test_json, batch_size,\n",
    "                              maxlen_in, maxlen_out)\n",
    "te_loader = AudioDataLoader(te_dataset, batch_size=1, num_workers=num_workers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : (tensor([[[ 4.8704,  7.6203, 10.4137,  ..., 13.1590, 13.5756, 13.2265],\n",
      "         [ 4.9864,  7.2130,  8.9778,  ..., 13.0554, 13.8832, 13.1171],\n",
      "         [ 3.2858,  6.8057,  9.6691,  ..., 12.8481, 12.9974, 12.5761],\n",
      "         ...,\n",
      "         [ 0.9360,  5.2082,  6.6501,  ..., 12.3645, 11.8783, 12.0713],\n",
      "         [ 1.5080,  3.2681,  5.2221,  ..., 12.3645, 11.9866, 11.5665],\n",
      "         [ 4.1747,  5.6292,  5.7219,  ..., 12.4336, 11.9866, 11.5350]],\n",
      "\n",
      "        [[ 1.1541,  4.0020,  4.9308,  ..., 12.2372, 12.0145, 11.6568],\n",
      "         [ 1.6574,  4.6347,  5.9078,  ..., 12.0907, 12.0145, 11.5204],\n",
      "         [ 3.7213,  5.0948,  6.1032,  ..., 11.8814, 11.9975, 11.7713],\n",
      "         ...,\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],\n",
      "\n",
      "        [[ 3.3843,  6.3341,  7.3461,  ..., 11.7156, 12.2395, 11.6748],\n",
      "         [ 3.1986,  6.3341,  7.1736,  ..., 13.1183, 12.7371, 12.6088],\n",
      "         [ 2.4558,  5.2963,  6.2250,  ..., 12.3444, 11.7419, 11.4550],\n",
      "         ...,\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],\n",
      "\n",
      "        ...,\n",
      "\n",
      "        [[ 2.0581,  4.1015,  5.7586,  ..., 12.5086, 12.0808, 10.4910],\n",
      "         [ 1.9973,  4.4717,  5.0342,  ..., 12.1088, 11.8973, 11.5745],\n",
      "         [ 1.2072,  4.5775,  6.0221,  ..., 11.6893, 11.8441, 11.6466],\n",
      "         ...,\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],\n",
      "\n",
      "        [[ 1.8234,  4.4490,  6.2206,  ..., 12.1579, 11.7644, 11.1933],\n",
      "         [ 1.3167,  4.4490,  6.4860,  ..., 12.3670, 11.8595, 11.8651],\n",
      "         [ 1.0633,  4.6913,  6.8842,  ..., 12.6457, 12.1926, 11.2941],\n",
      "         ...,\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],\n",
      "\n",
      "        [[ 3.8607,  3.8636,  5.9016,  ..., 12.1340, 12.2772, 11.3214],\n",
      "         [ 3.8607,  4.5781,  5.6901,  ..., 12.6333, 12.3683, 11.5110],\n",
      "         [ 3.1219,  5.1339,  6.5363,  ..., 12.1849, 12.6417, 11.8764],\n",
      "         ...,\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
      "         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]]), tensor([1324, 1164, 1144, 1100, 1056, 1056, 1040, 1032, 1028, 1028, 1016, 1012,\n",
      "        1008, 1004, 1000,  984,  980,  964,  960,  956,  956,  952,  952,  952,\n",
      "         952,  948,  944,  932,  932,  932,  928,  928]), tensor([[ 220,   60,  285,  ...,   -1,   -1,   -1],\n",
      "        [ 595, 1150, 2890,  ...,   -1,   -1,   -1],\n",
      "        [ 552,  547,  200,  ...,  929,   -1,   -1],\n",
      "        ...,\n",
      "        [  54,  448,   66,  ...,   -1,   -1,   -1],\n",
      "        [ 401,  911, 1060,  ..., 1204,  268, 1180],\n",
      "        [1144,  347,  872,  ...,   -1,   -1,   -1]]))\n"
     ]
    }
   ],
   "source": [
    "for i, (data) in enumerate(te_loader):\n",
    "    print(str(i)+\" : \"+str(data))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "32\n",
      "32\n",
      "\n",
      "\n",
      "32\n",
      "----------------------------------\n",
      "\n",
      "\n",
      "-----------------2\n",
      "\n",
      "\n",
      "2\n",
      "\n",
      "2\n",
      "32\n",
      "32\n",
      "\n",
      "\n",
      "\n",
      "----------------------------------\n",
      "2\n",
      "\n",
      "2\n",
      "\n",
      "\n",
      "32\n",
      "\n",
      "-----------------\n",
      "2\n",
      "\n",
      "0 : None\n"
     ]
    }
   ],
   "source": [
    "for i, (data) in enumerate(te_loader):\n",
    "    print(str(i)+\" : \"+str(data))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('IC0085W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:193509', 'shape': [926, 40]}, 'output': {'text': '并成功实现了商流信息流资金流与物流的四流合一', 'tokenid': [1073, 309, 1100, 1207, 136, 63, 1510, 149, 369, 997, 149, 1563, 497, 149, 501, 132, 149, 20, 366, 149, 45, 66], 'shape': [22, 4519]}})('IC0094W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:36078', 'shape': [1166, 40]}, 'output': {'text': '王辰澎湃资料八月十八日下午二点二十分左右在运行时梯街突然跳起', 'tokenid': [595, 1150, 2890, 3877, 1563, 1564, 777, 745, 27, 777, 521, 142, 966, 760, 74, 760, 27, 421, 1824, 2063, 51, 625, 137, 83, 1309, 127, 789, 408, 1635, 404], 'shape': [30, 4519]}})\n",
      "\n",
      "('IC0086W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:1905953', 'shape': [814, 40]}, 'output': {'text': '上校阅兵后获批率直升机绕飞仙桃市向家乡致意', 'tokenid': [54, 516, 1360, 586, 114, 1963, 1943, 1593, 1041, 403, 544, 2440, 406, 1781, 28, 22, 543, 417, 495, 1054, 439], 'shape': [21, 4519]}})('ID0023W0422', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:4555552', 'shape': [830, 40]}, 'output': {'text': '位于冕宁县灵山景区有七名宿营大学生因突发暴雨被困', 'tokenid': [57, 1398, 2973, 1069, 307, 1648, 523, 266, 396, 107, 748, 444, 1283, 1942, 217, 238, 25, 377, 789, 570, 513, 218, 392, 1468], 'shape': [24, 4519]}})('IC0007W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6598833', 'shape': [848, 40]}, 'output': {'text': '有天坐巴士去赤柱以及去愉景湾参加朋友婚礼', 'tokenid': [107, 78, 128, 763, 788, 125, 533, 3101, 48, 435, 125, 1464, 266, 1917, 1326, 285, 477, 478, 838, 1411], 'shape': [20, 4519]}})\n",
      "\n",
      "\n",
      "('IC0009W0360', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3432159', 'shape': [878, 40]}, 'output': {'text': '首先由带有浓浓山海关特色的项目一古城定向开场', 'tokenid': [207, 564, 1092, 494, 107, 2702, 2702, 523, 448, 179, 336, 724, 20, 2447, 723, 66, 1128, 480, 87, 543, 95, 545], 'shape': [22, 4519]}})\n",
      "0 : None\n"
     ]
    }
   ],
   "source": [
    "for i, (data) in enumerate(te_loader):\n",
    "    print(str(i)+\" : \"+str(data))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0096W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:5912144', 'shape': [1325, 40]}, 'output': {'text': '就是加入了P点对点对等网络传输的更新方式', 'tokenid': [220, 60, 285, 346, 63, '<unk>', 74, 935, 74, 935, 537, 651, 652, 664, 1746, 20, 557, 184, 292, 821], 'shape': [20, 4519]}}), ('IC0094W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:36078', 'shape': [1166, 40]}, 'output': {'text': '王辰澎湃资料八月十八日下午二点二十分左右在运行时梯街突然跳起', 'tokenid': [595, 1150, 2890, 3877, 1563, 1564, 777, 745, 27, 777, 521, 142, 966, 760, 74, 760, 27, 421, 1824, 2063, 51, 625, 137, 83, 1309, 127, 789, 408, 1635, 404], 'shape': [30, 4519]}}), ('ID0042W0471', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6090243', 'shape': [1146, 40]}, 'output': {'text': '但作为深圳国资委旗下最大的房产房地产但作为深圳国资委旗下最大的房地产公司', 'tokenid': [552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 1300, 1362, 52, 1300, 552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 52, 1300, 172, 929], 'shape': [36, 4519]}}), ('IC0007W0369', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5394213', 'shape': [1101, 40]}, 'output': {'text': '他的主要对手是美国是田径锦赛锦标赛的金牌得主乔科斯瓦斯', 'tokenid': [194, 20, 824, 99, 935, 241, 60, 567, 468, 60, 484, 2219, 1298, 1180, 1298, 55, 1180, 20, 497, 1252, 471, 824, 624, 1523, 1061, 914, 1061], 'shape': [27, 4519]}}), ('IC0080W0192', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9525870', 'shape': [1059, 40]}, 'output': {'text': '积极稳妥探索水利建设贷款等涉农贷款资产证券化试点', 'tokenid': [1621, 1089, 1737, 2257, 1503, 16, 98, 526, 917, 683, 2975, 104, 537, 1743, 1065, 2975, 104, 1563, 1300, 859, 952, 1351, 930, 74], 'shape': [24, 4519]}}), ('IC0003W0027', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8932830', 'shape': [1057, 40]}, 'output': {'text': '万达广场中共北京市石景山区委员会东', 'tokenid': [1569, 752, 365, 545, 347, 503, 224, 360, 22, 912, 266, 523, 396, 1721, 994, 174, 359], 'shape': [17, 4519]}}), ('IC0003W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9076930', 'shape': [1043, 40]}, 'output': {'text': '五种高价中药材价格普降超两成一半品种都在下跌', 'tokenid': [401, 568, 246, 399, 347, 876, 1605, 399, 400, 971, 591, 21, 505, 309, 66, 1170, 490, 568, 173, 51, 142, 2462], 'shape': [22, 4519]}}), ('IC0095W0392', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:7196508', 'shape': [1032, 40]}, 'output': {'text': '拿过上海市五一杯长跑比赛冠军杨浦区迎春长跑冠军', 'tokenid': [933, 420, 54, 448, 22, 401, 66, 1638, 176, 1021, 268, 1180, 1462, 773, 337, 2013, 396, 2342, 647, 176, 1021, 1462, 773], 'shape': [23, 4519]}}), ('IC0003W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8547324', 'shape': [1029, 40]}, 'output': {'text': '五村官私分七二一救灾款获刑获刑两年半至三年', 'tokenid': [401, 496, 1234, 1589, 421, 748, 760, 66, 1347, 3517, 104, 1963, 1233, 1963, 1233, 505, 303, 1170, 668, 24, 303], 'shape': [21, 4519]}}), ('IC0080W0194', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9594096', 'shape': [1029, 40]}, 'output': {'text': '建立金融支持水利改革发展的风险分散和政策保障机制', 'tokenid': [917, 1369, 497, 2478, 1168, 1485, 98, 526, 630, 1592, 570, 2053, 20, 145, 629, 421, 1379, 232, 2203, 2730, 628, 1718, 544, 278], 'shape': [24, 4519]}}), ('ID0043W0445', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5565524', 'shape': [1016, 40]}, 'output': {'text': '三盗墓者酒后捅伤的哥一人背部文有钟馗图案', 'tokenid': [24, 1765, 2016, 281, 871, 114, 3525, 209, 20, 891, 66, 72, 265, 1081, 772, 107, 1014, 4268, 53, 566], 'shape': [20, 4519]}}), ('ID0021W0389', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.30.ark:3999585', 'shape': [1012, 40]}, 'output': {'text': '二零一四财年收入下降四点百分之八至二二七八百六五十二亿日元', 'tokenid': [760, 783, 66, 366, 1715, 303, 131, 346, 142, 591, 366, 74, 988, 421, 339, 777, 668, 760, 760, 748, 777, 988, 283, 401, 27, 760, 2721, 521, 1323], 'shape': [29, 4519]}}), ('IC0003W0352', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:6187541', 'shape': [1009, 40]}, 'output': {'text': '它们分别是布达佩斯汉堡洛杉矶巴黎和罗马', 'tokenid': [262, 198, 421, 279, 60, 985, 752, 1376, 1061, 355, 2648, 273, 2015, 3882, 763, 1228, 232, 1127, 643], 'shape': [19, 4519]}}), ('IC0007W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5268808', 'shape': [1005, 40]}, 'output': {'text': '二零一三年世锦赛铜牌的得主卢克斯梅里赫也不容忽视', 'tokenid': [760, 783, 66, 24, 303, 26, 1298, 1180, 1059, 1252, 20, 471, 824, 1138, 317, 1061, 835, 10, 2404, 229, 42, 603, 1923, 93], 'shape': [24, 4519]}}), ('IC0006W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:7056553', 'shape': [1001, 40]}, 'output': {'text': '六十九岁独腿环卫工拄拐杖扫大街七年半小时扫三百米', 'tokenid': [283, 27, 419, 796, 335, 1995, 402, 823, 776, '<unk>', 2424, 3955, 1363, 217, 127, 748, 303, 1170, 7, 83, 1363, 24, 988, 447], 'shape': [24, 4519]}}), ('ID0043W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5971158', 'shape': [985, 40]}, 'output': {'text': '香港航空HX三零四飞往北京的航班延误九小时', 'tokenid': [1113, 1135, 86, 264, '<unk>', '<unk>', 24, 783, 366, 406, 1675, 224, 360, 20, 86, 143, 1158, 1159, 419, 7, 83], 'shape': [21, 4519]}}), ('IC0096W0323', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6414270', 'shape': [980, 40]}, 'output': {'text': '每二千辆电动汽车至少配套建设一座公交充电站', 'tokenid': [469, 760, 1242, 1604, 30, 323, 1306, 129, 668, 148, 1460, 1866, 917, 683, 66, 69, 172, 81, 411, 30, 774], 'shape': [21, 4519]}}), ('IC0009W0387', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3931850', 'shape': [966, 40]}, 'output': {'text': '山海关区旅游局山海关区体育局以及各相关单位承办', 'tokenid': [523, 448, 179, 396, 1571, 661, 1457, 523, 448, 179, 396, 451, 1184, 1457, 48, 435, 1115, 831, 179, 159, 57, 1121, 165], 'shape': [23, 4519]}}), ('IC0007W0478', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7739130', 'shape': [962, 40]}, 'output': {'text': '六岁女孩肺功能衰竭如八旬老人病因成谜呼气吃力', 'tokenid': [283, 796, 766, 866, 1588, 1100, 177, 2870, 2700, 236, 777, 2840, 282, 72, 843, 377, 309, 3038, 1110, 79, 853, 270], 'shape': [22, 4519]}}), ('IC0002W0363', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5831491', 'shape': [959, 40]}, 'output': {'text': '精工钢构亚厦股份杭钢股份等值得关注', 'tokenid': [1598, 776, 440, 1594, 527, 1846, 950, 858, 580, 440, 950, 858, 537, 883, 471, 179, 438], 'shape': [17, 4519]}}), ('ID0043W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3286257', 'shape': [959, 40]}, 'output': {'text': '九亿五千九百七十四万两千四百零五点一三', 'tokenid': [419, 2721, 401, 1242, 419, 988, 748, 27, 366, 1569, 505, 1242, 366, 988, 783, 401, 74, 66, 24], 'shape': [19, 4519]}}), ('IC0009W0414', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4539381', 'shape': [955, 40]}, 'output': {'text': '搜狐娱乐讯刘嘉玲日前戴上半亿珠宝现身北京出席活动', 'tokenid': [15, 2159, 183, 157, 1992, 583, 1108, 1924, 521, 931, 1849, 54, 1170, 2721, 1791, 778, 136, 701, 224, 360, 56, 2028, 1584, 323], 'shape': [24, 4519]}}), ('ID0041W0470', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:6703502', 'shape': [955, 40]}, 'output': {'text': '而六月居冠的天河北成交宗数下近百分之二', 'tokenid': [1044, 283, 745, 1263, 1462, 20, 78, 223, 224, 309, 81, 597, 1131, 142, 19, 988, 421, 339, 760], 'shape': [19, 4519]}}), ('IC0009W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5629083', 'shape': [954, 40]}, 'output': {'text': '六十五岁的通许县人潘安在郑州市做学生课桌椅加工生意', 'tokenid': [283, 27, 401, 796, 20, 178, 540, 307, 72, 2111, 720, 51, 201, 226, 22, 263, 238, 25, 353, 1284, 1975, 285, 776, 25, 439], 'shape': [25, 4519]}}), ('IC0007W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7463127', 'shape': [952, 40]}, 'output': {'text': '他们结伴去位于觐州奉化交界的卖柴岳徒步', 'tokenid': [194, 198, 893, 409, 125, 57, 1398, '<unk>', 226, 2944, 1351, 81, 1125, 20, 397, 1118, 1501, 1905, 345], 'shape': [19, 4519]}}), ('IC0096W0122', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2248237', 'shape': [950, 40]}, 'output': {'text': '不仅是北京上海广州等一线城市地块溢价率在走高', 'tokenid': [42, 1891, 60, 224, 360, 54, 448, 365, 226, 537, 66, 284, 480, 22, 52, 907, 1989, 399, 1593, 51, 520, 246], 'shape': [22, 4519]}}), ('ID0046W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5373814', 'shape': [947, 40]}, 'output': {'text': '自自己只是把二传手们喜欢的两次吊球变成两次进攻而已', 'tokenid': [275, 275, 276, 96, 60, 92, 760, 664, 241, 198, 88, 89, 20, 505, 1204, 3191, 1039, 730, 309, 505, 1204, 1566, 845, 1044, 1197], 'shape': [25, 4519]}}), ('IC0009W0166', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8596778', 'shape': [934, 40]}, 'output': {'text': '同比增长百分之二十一点七净资产均值为二百零二点三亿元', 'tokenid': [426, 268, 1836, 176, 988, 421, 339, 760, 27, 66, 74, 748, 582, 1563, 1300, 671, 883, 200, 760, 988, 783, 760, 74, 24, 2721, 1323], 'shape': [26, 4519]}}), ('IC0009W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4272465', 'shape': [934, 40]}, 'output': {'text': '以色列选手米宁科十四米七八创造国家纪录摘得银牌', 'tokenid': [48, 724, 429, 687, 241, 447, 1069, 1523, 27, 366, 447, 748, 777, 1299, 1855, 468, 417, 1694, 290, 3220, 471, 659, 1252], 'shape': [23, 4519]}}), ('IC0087W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:2815299', 'shape': [934, 40]}, 'output': {'text': '上海一大巴司机俯身捡手机致翻车六名乘客死亡', 'tokenid': [54, 448, 66, 217, 763, 929, 544, '<unk>', 701, 3062, 241, 544, 1054, 1815, 129, 283, 444, 1771, 1364, 502, 2024], 'shape': [21, 4519]}}), ('IC0089W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.18.ark:7895628', 'shape': [931, 40]}, 'output': {'text': '五组队员以二十四小时总跑量二万七千九百七十五点五三的成绩完美的结束了此次比赛', 'tokenid': [401, 911, 1060, 994, 48, 760, 27, 366, 7, 83, 878, 1021, 150, 760, 1569, 748, 1242, 419, 988, 748, 27, 401, 74, 401, 24, 20, 309, 1581, 37, 567, 20, 893, 894, 63, 1099, 1204, 268, 1180], 'shape': [38, 4519]}}), ('IC0002W0133', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1397741', 'shape': [930, 40]}, 'output': {'text': '其中包括对拥有一套住房并已结清相应购房贷款的家庭', 'tokenid': [1144, 347, 872, 1052, 935, 1825, 107, 66, 1866, 381, 1362, 1073, 1197, 893, 768, 831, 677, 111, 1362, 2975, 104, 20, 417, 1770], 'shape': [24, 4519]}})]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0007W0264', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:3152908', 'shape': [927, 40]}, 'output': {'text': 'CX六百采用的也并非是FIREFOX操作系统', 'tokenid': ['<unk>', '<unk>', 283, 988, 2394, 261, 20, 229, 1073, 959, 60, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 536, 547, 1009, 1072], 'shape': [22, 4519]}}), ('IC0085W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:193509', 'shape': [926, 40]}, 'output': {'text': '并成功实现了商流信息流资金流与物流的四流合一', 'tokenid': [1073, 309, 1100, 1207, 136, 63, 1510, 149, 369, 997, 149, 1563, 497, 149, 501, 132, 149, 20, 366, 149, 45, 66], 'shape': [22, 4519]}}), ('IC0082W0021', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.11.ark:5199439', 'shape': [924, 40]}, 'output': {'text': '幺九四九THEHIDDENCCITY三里屯店', 'tokenid': [1844, 419, 366, 419, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 24, 10, 2265, 106], 'shape': [22, 4519]}}), ('ID0042W0179', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:1680666', 'shape': [922, 40]}, 'output': {'text': '烦请把空调温度调到调大到二十六度烦请把空调温度调大到二十六度', 'tokenid': [349, 2, 92, 264, 160, 161, 139, 160, 75, 160, 217, 75, 760, 27, 283, 139, 349, 2, 92, 264, 160, 161, 139, 160, 217, 75, 760, 27, 283, 139], 'shape': [30, 4519]}}), ('IC0087W0350', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:312919', 'shape': [921, 40]}, 'output': {'text': '昆仑决周口站闫西波剑指帽子戏法搜狐体育', 'tokenid': [1773, 2007, 1366, 227, 944, 774, 304, 873, 1735, 1415, 134, 1040, 393, 662, 187, 15, 2159, 451, 1184], 'shape': [19, 4519]}}), ('IC0002W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:4645365', 'shape': [920, 40]}, 'output': {'text': '从智能手表到带嵌入式电子的健身手镯珠宝和衣服', 'tokenid': [371, 1768, 177, 241, 1116, 75, 494, 3939, 346, 821, 30, 393, 20, 563, 701, 241, 3698, 1791, 778, 232, 40, 41], 'shape': [22, 4519]}}), ('IC0080W0357', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3053894', 'shape': [918, 40]}, 'output': {'text': '不管是新科世锦冠军马龙还是直板第一人许昕', 'tokenid': [42, 897, 60, 184, 1523, 26, 1298, 1462, 773, 643, 1062, 146, 60, 1041, 1317, 254, 66, 72, 540, 2087], 'shape': [20, 4519]}}), ('IC0007W0247', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2759707', 'shape': [914, 40]}, 'output': {'text': '年底前全面实施儿童白血病等八个病种的大病保障', 'tokenid': [303, 1195, 931, 11, 216, 1207, 1178, 249, 1068, 1042, 1214, 843, 537, 777, 39, 843, 568, 20, 217, 843, 628, 1718], 'shape': [22, 4519]}}), ('IC0008W0241', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:2079482', 'shape': [914, 40]}, 'output': {'text': '将推动保险资产向更长久期和更低风险资产配置过渡', 'tokenid': [1611, 64, 323, 628, 629, 1563, 1300, 543, 557, 176, 1024, 196, 232, 557, 991, 145, 629, 1563, 1300, 1460, 58, 420, 1522], 'shape': [23, 4519]}}), ('ID0047W0515', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:7111197', 'shape': [914, 40]}, 'output': {'text': '新股民亏四零万很闹心吃饭时心不在焉被枣核卡喉', 'tokenid': [184, 950, 12, 2802, 366, 783, 1569, 244, 1013, 163, 853, 852, 83, 163, 42, 51, 3200, 392, 2610, 1562, 1007, 2359], 'shape': [22, 4519]}}), ('IC0003W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:2391636', 'shape': [912, 40]}, 'output': {'text': '房企跨界触网营销等实质上都是在打抢客户大战', 'tokenid': [1362, 3202, 1659, 1125, 2534, 651, 1942, 977, 537, 1207, 1926, 54, 173, 60, 51, 94, 2037, 1364, 168, 217, 622], 'shape': [21, 4519]}}), ('IC0003W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8453225', 'shape': [908, 40]}, 'output': {'text': '皇姑区人民法院公布了这起民事纠纷案件的判决结果', 'tokenid': [1456, 710, 396, 72, 12, 187, 515, 172, 985, 63, 9, 404, 12, 363, 2899, 2771, 566, 1105, 20, 1786, 1366, 893, 237], 'shape': [23, 4519]}}), ('IC0002W0412', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:6772348', 'shape': [904, 40]}, 'output': {'text': '她透露日前以大使身份到四川探访眼疾病童四日', 'tokenid': [886, 1554, 1491, 521, 931, 48, 217, 654, 701, 858, 75, 366, 1334, 1503, 2386, 256, 3402, 843, 1068, 366, 521], 'shape': [21, 4519]}}), ('IC0096W0376', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7521059', 'shape': [902, 40]}, 'output': {'text': '上海静安队的朱莹洁首先在跪射阶段处于落后状态', 'tokenid': [54, 448, 828, 720, 1060, 20, 1615, 1652, 1532, 207, 564, 51, 491, 2198, 1164, 518, 665, 1398, 717, 114, 1776, 679], 'shape': [22, 4519]}}), ('IC0009W0458', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5391553', 'shape': [900, 40]}, 'output': {'text': '六旬男给九零后小三买车房女孩与人暧昧被逼写承诺书', 'tokenid': [283, 2840, 995, 70, 419, 783, 114, 7, 24, 616, 129, 1362, 766, 866, 501, 72, 2249, 2250, 392, 2357, 861, 1121, 1122, 697], 'shape': [24, 4519]}}), ('IC0002W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7683440', 'shape': [899, 40]}, 'output': {'text': '妈妈带着鹏鹏前往顺义区首儿李桥儿童医院口腔科看病', 'tokenid': [939, 939, 494, 341, 2262, 2262, 931, 1675, 1050, 1399, 396, 207, 249, 414, 1293, 249, 1068, 841, 515, 944, 2490, 1523, 121, 843], 'shape': [24, 4519]}}), ('IC0085W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2714460', 'shape': [899, 40]}, 'output': {'text': '三门峡市湖滨区会兴镇东坡村段黄河河岸突然塌陷', 'tokenid': [24, 192, 1528, 22, 464, 2193, 396, 174, 1270, 819, 359, 1853, 496, 518, 489, 223, 223, 1763, 789, 408, 3589, 3354], 'shape': [22, 4519]}}), ('ID0047W0396', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4697510', 'shape': [898, 40]}, 'output': {'text': '欣赏到了奇幻魔术秀面部彩绘等精彩节目', 'tokenid': [689, 690, 75, 63, 895, 465, 713, 1032, 984, 216, 1081, 1599, 3297, 537, 1598, 1599, 433, 723], 'shape': [18, 4519]}}), ('IC0007W0373', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5497905', 'shape': [897, 40]}, 'output': {'text': '他在摩纳哥站投出了两二十二秒五六的个人最好成绩', 'tokenid': [194, 51, 1999, 2782, 891, 774, 49, 56, 63, 505, 760, 27, 760, 1613, 401, 283, 20, 39, 72, 208, 120, 309, 1581], 'shape': [23, 4519]}}), ('IC0007W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2066192', 'shape': [895, 40]}, 'output': {'text': '符合医保定点相关规定的非公立医疗机构才可以申请', 'tokenid': [1346, 45, 841, 628, 87, 74, 831, 179, 1769, 87, 20, 959, 172, 1369, 841, 3167, 544, 1594, 673, 47, 48, 2202, 2], 'shape': [23, 4519]}}), ('IC0007W0493', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:8098425', 'shape': [892, 40]}, 'output': {'text': '六岁娃处女膜破裂学校录像离奇被剪疑遭校长猥琐', 'tokenid': [283, 796, 1549, 665, 766, 2785, 1319, 1506, 238, 516, 290, 117, 899, 895, 392, 1728, 1258, 2600, 516, 176, 2079, 2080], 'shape': [22, 4519]}}), ('IC0008W0393', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4872258', 'shape': [892, 40]}, 'output': {'text': '沙特阿拉伯选手马马斯拉赫以四十三秒九三预获得预赛第一', 'tokenid': [765, 336, 373, 188, 1375, 687, 241, 643, 643, 1061, 188, 2404, 48, 366, 27, 24, 1613, 419, 24, 32, 1963, 471, 32, 1180, 254, 66], 'shape': [26, 4519]}}), ('IC0085W0305', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:157476', 'shape': [892, 40]}, 'output': {'text': '信E付将面向全行业用户提供上下游供应链服务新模式', 'tokenid': [369, '<unk>', 103, 1611, 216, 543, 11, 137, 575, 261, 168, 239, 2214, 54, 142, 661, 2214, 677, 1186, 41, 1018, 184, 820, 821], 'shape': [24, 4519]}}), ('ID0049W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.48.ark:4718586', 'shape': [891, 40]}, 'output': {'text': '郭喜闯Vs叶夫根叶夫根尼沃龙科夫罗斯', 'tokenid': [1126, 88, 1247, '<unk>', 702, 797, 1066, 219, 797, 1066, 219, 1608, 2796, 1062, 1523, 1066, 1127, 1061], 'shape': [18, 4519]}}), ('ID0043W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5606517', 'shape': [887, 40]}, 'output': {'text': '白马塔大桥是敦煌市确定的十大重点工程之一', 'tokenid': [1042, 643, 1930, 217, 1293, 60, 1785, 2049, 22, 1325, 87, 20, 27, 217, 573, 74, 776, 1590, 339, 66], 'shape': [20, 4519]}}), ('IC0098W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.27.ark:1824808', 'shape': [886, 40]}, 'output': {'text': '但对于六月份的成交数据也有一定刺激开发商掌握楼市五一和十一黄金周两个节点', 'tokenid': [552, 935, 1398, 283, 745, 858, 20, 309, 81, 1131, 1775, 229, 107, 66, 87, 1142, 793, 95, 570, 1510, 2073, 2296, 1294, 22, 401, 66, 232, 27, 66, 489, 497, 227, 505, 39, 433, 74], 'shape': [36, 4519]}}), ('IC0007W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:333006', 'shape': [885, 40]}, 'output': {'text': '而在今年十月底雅戈尔MAYOR馆杭州开业时', 'tokenid': [1044, 51, 267, 303, 27, 745, 1195, 498, 2812, 529, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 762, 580, 226, 95, 575, 83], 'shape': [21, 4519]}}), ('IC0095W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:8676976', 'shape': [885, 40]}, 'output': {'text': '上海宝马拖拽交警致死案司机翻供否认故意伤害', 'tokenid': [54, 448, 778, 643, 2179, 3532, 81, 949, 1054, 502, 566, 929, 544, 1815, 2214, 61, 199, 362, 439, 209, 1119], 'shape': [21, 4519]}}), ('IC0003W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7869014', 'shape': [882, 40]}, 'output': {'text': '五岁男童被罚喝二十杯水致肾积水回应只喝十次', 'tokenid': [401, 796, 995, 1068, 392, 2030, 870, 760, 27, 1638, 98, 1054, 3081, 1621, 98, 511, 677, 96, 870, 27, 1204], 'shape': [21, 4519]}}), ('IC0083W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:4534489', 'shape': [882, 40]}, 'output': {'text': '三轮车被拦截突然加速协警遭拖行数米多处受伤', 'tokenid': [24, 1030, 129, 392, 2330, 2875, 789, 408, 285, 62, 2369, 949, 2600, 2179, 137, 1131, 447, 147, 665, 504, 209], 'shape': [21, 4519]}}), ('ID0028W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.37.ark:4427378', 'shape': [882, 40]}, 'output': {'text': '本部长藤本松下将B', 'tokenid': [105, 1081, 176, 2167, 105, 354, 142, 1611, '<unk>'], 'shape': [9, 4519]}}), ('IC0009W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:6100369', 'shape': [879, 40]}, 'output': {'text': '今年六十来岁的王某化名是蚌埠市某公司管理人员', 'tokenid': [267, 303, 283, 27, 154, 796, 20, 595, 71, 1351, 444, 60, 3075, 3076, 22, 71, 172, 929, 897, 666, 72, 994], 'shape': [22, 4519]}})]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('ID0041W0413', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5453061', 'shape': [849, 40]}, 'output': {'text': '中海集运二零一一年全年净污损达七亿元', 'tokenid': [347, 448, 725, 625, 760, 783, 66, 66, 303, 11, 303, 582, 2658, 1431, 752, 748, 2721, 1323], 'shape': [18, 4519]}}), ('IC0007W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6598833', 'shape': [848, 40]}, 'output': {'text': '有天坐巴士去赤柱以及去愉景湾参加朋友婚礼', 'tokenid': [107, 78, 128, 763, 788, 125, 533, 3101, 48, 435, 125, 1464, 266, 1917, 1326, 285, 477, 478, 838, 1411], 'shape': [20, 4519]}}), ('IC0091W0496', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.21.ark:1598749', 'shape': [848, 40]}, 'output': {'text': '上海国际和平妇幼保健院急诊科一名医生遭女患者殴打', 'tokenid': [54, 448, 468, 1166, 232, 1223, 300, 2204, 628, 563, 515, 455, 1677, 1523, 66, 444, 841, 25, 2600, 766, 2977, 281, 4393, 94], 'shape': [24, 4519]}}), ('IC0003W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8489898', 'shape': [847, 40]}, 'output': {'text': '五旬光棍相亲被骗红包持刀砍媒人获刑不愿赔偿', 'tokenid': [401, 2840, 452, 2277, 831, 615, 392, 1498, 457, 872, 1485, 1102, 2077, 2259, 72, 1963, 1233, 42, 1378, 2048, 3123], 'shape': [21, 4519]}}), ('ID0046W0427', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:4984794', 'shape': [845, 40]}, 'output': {'text': '从零岁的婴儿至旧会徽的设计者佐野研二郎均可以参加', 'tokenid': [371, 783, 796, 20, 2970, 249, 668, 2014, 174, 2000, 20, 683, 1305, 281, 1861, 485, 1918, 760, 2042, 671, 47, 48, 1326, 285], 'shape': [24, 4519]}}), ('IC0080W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5178300', 'shape': [844, 40]}, 'output': {'text': '三元桥启动换梁大修将在夜间占用三环进行施工', 'tokenid': [24, 1323, 1293, 1356, 323, 124, 827, 217, 698, 1611, 51, 696, 84, 1143, 261, 24, 402, 1566, 137, 1178, 776], 'shape': [21, 4519]}}), ('IC0080W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5269279', 'shape': [844, 40]}, 'output': {'text': '一艘渔船在长寿区长江川维码头附近触礁翻船', 'tokenid': [66, 2918, 3739, 1028, 51, 176, 1720, 396, 176, 356, 1334, 1843, 1189, 118, 18, 19, 2534, 3182, 1815, 1028], 'shape': [20, 4519]}}), ('IC0085W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2666234', 'shape': [842, 40]}, 'output': {'text': '三门峡泄洪村民冒险捞鱼被困警用直升机紧急救援', 'tokenid': [24, 192, 1528, 3428, 1416, 496, 12, 1296, 629, 3470, 932, 392, 1468, 949, 261, 1041, 403, 544, 1392, 455, 1347, 3814], 'shape': [22, 4519]}}), ('IC0096W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:8936948', 'shape': [841, 40]}, 'output': {'text': '上海小客车总量达三百二十万辆未来两年或增八十万辆', 'tokenid': [54, 448, 7, 1364, 129, 878, 150, 752, 24, 988, 760, 27, 1569, 1604, 1784, 154, 505, 303, 280, 1836, 777, 27, 1569, 1604], 'shape': [24, 4519]}}), ('IC0096W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9105539', 'shape': [841, 40]}, 'output': {'text': '因为前晚杨浦区市场监督管理局的一纸封条', 'tokenid': [377, 200, 931, 73, 337, 2013, 396, 22, 545, 2050, 2051, 897, 666, 1457, 20, 66, 2122, 1716, 1016], 'shape': [19, 4519]}}), ('IC0006W0160', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:1065543', 'shape': [840, 40]}, 'output': {'text': '本公司的房地产开发及进出口贸易业务将被全部剥离', 'tokenid': [105, 172, 929, 20, 1362, 52, 1300, 95, 570, 435, 1566, 56, 944, 2643, 82, 575, 1018, 1611, 392, 11, 1081, 3671, 899], 'shape': [23, 4519]}}), ('IC0080W0453', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5001869', 'shape': [840, 40]}, 'output': {'text': '三人网购他人身份证办信用卡卖四十多张获利万元', 'tokenid': [24, 72, 651, 111, 194, 72, 701, 858, 859, 165, 369, 261, 1007, 397, 366, 27, 147, 474, 1963, 526, 1569, 1323], 'shape': [22, 4519]}}), ('IC0002W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1664231', 'shape': [839, 40]}, 'output': {'text': '也通过大幅度调整对普通住宅总价认定标准的方式', 'tokenid': [229, 178, 420, 217, 3133, 139, 160, 658, 935, 971, 178, 381, 1597, 878, 399, 199, 87, 55, 35, 20, 292, 821], 'shape': [22, 4519]}}), ('IC0096W0325', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6469976', 'shape': [839, 40]}, 'output': {'text': '各地要将充电基础设施配套电网建设与改造项目', 'tokenid': [1115, 52, 99, 1611, 411, 30, 228, 2280, 683, 1178, 1460, 1866, 30, 651, 917, 683, 501, 630, 1855, 2447, 723], 'shape': [21, 4519]}}), ('ID0046W0472', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5820679', 'shape': [839, 40]}, 'output': {'text': '继续实施农业种子种苗种畜种禽免税进口优惠政策', 'tokenid': [2137, 1856, 1207, 1178, 1065, 575, 568, 393, 568, 1540, 568, 3616, 568, 3453, 890, 2064, 1566, 944, 112, 113, 2203, 2730], 'shape': [22, 4519]}}), ('IC0009W0395', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4089674', 'shape': [838, 40]}, 'output': {'text': '肯尼亚队以二金二银二铜在金牌榜和奖牌榜上暂时领跑', 'tokenid': [1534, 1608, 527, 1060, 48, 760, 497, 760, 659, 760, 1059, 51, 497, 1252, 1077, 232, 1173, 1252, 1077, 54, 1499, 83, 909, 1021], 'shape': [24, 4519]}}), ('IC0080W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4862071', 'shape': [838, 40]}, 'output': {'text': '丰台检察院以诈骗罪对胡某路某吴某提起公诉', 'tokenid': [1437, 67, 1568, 2256, 515, 48, 3373, 1498, 506, 935, 814, 71, 327, 71, 331, 71, 239, 404, 172, 260], 'shape': [20, 4519]}}), ('ID0043W0330', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3779769', 'shape': [838, 40]}, 'output': {'text': '五亿九千九百五十二万五千零九十五点五一', 'tokenid': [401, 2721, 419, 1242, 419, 988, 401, 27, 760, 1569, 401, 1242, 783, 419, 27, 401, 74, 401, 66], 'shape': [19, 4519]}}), ('IC0085W0283', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8597211', 'shape': [837, 40]}, 'output': {'text': '该市场在二零三零年之前规模将达到七百亿美元之巨', 'tokenid': [569, 22, 545, 51, 760, 783, 24, 783, 303, 339, 931, 1769, 820, 1611, 752, 75, 748, 988, 2721, 567, 1323, 339, 2541], 'shape': [23, 4519]}}), ('IC0002W0435', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7209787', 'shape': [836, 40]}, 'output': {'text': '五岁智障儿走失三百天母亲曾放弃寻找悔恨不已', 'tokenid': [401, 796, 1768, 1718, 249, 520, 716, 24, 988, 78, 2078, 615, 1935, 6, 1682, 1070, 388, 2787, 901, 42, 1197], 'shape': [21, 4519]}}), ('IC0080W0356', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3020101', 'shape': [836, 40]}, 'output': {'text': '那就是以三十二的微弱劣势不敌山东鲁能的老将张超', 'tokenid': [245, 220, 60, 48, 24, 27, 760, 20, 719, 846, 2609, 1551, 42, 2286, 523, 359, 1799, 177, 20, 282, 1611, 474, 21], 'shape': [23, 4519]}}), ('ID0023W0477', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:5542934', 'shape': [836, 40]}, 'output': {'text': '正式对外宣布队长惠若琪因身体原因将缺席前半个赛季', 'tokenid': [250, 821, 935, 215, 663, 985, 1060, 176, 113, 584, 1221, 377, 701, 451, 364, 377, 1611, 925, 2028, 931, 1170, 39, 1180, 1383], 'shape': [24, 4519]}}), ('IC0002W0362', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5797738', 'shape': [835, 40]}, 'output': {'text': '有望首先令杭州以及浙江当地的基建上市公司获益', 'tokenid': [107, 352, 207, 564, 2435, 580, 226, 48, 435, 2008, 356, 472, 52, 20, 228, 917, 54, 22, 172, 929, 1963, 2279], 'shape': [22, 4519]}}), ('IC0009W0232', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:976375', 'shape': [835, 40]}, 'output': {'text': '国际社会倾向于用完全不同于美国的眼光来看待中国', 'tokenid': [468, 1166, 1572, 174, 314, 543, 1398, 261, 37, 11, 42, 426, 1398, 567, 468, 20, 256, 452, 154, 121, 197, 347, 468], 'shape': [23, 4519]}}), ('IC0009W0375', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3696974', 'shape': [834, 40]}, 'output': {'text': '每支队伍派二名队员完成约八十米高的速降', 'tokenid': [469, 1168, 1060, 779, 753, 760, 444, 1060, 994, 37, 309, 686, 777, 27, 447, 246, 20, 62, 591], 'shape': [19, 4519]}}), ('ID0041W0488', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7110456', 'shape': [834, 40]}, 'output': {'text': '跑者应该有耐心循环渐进的延长跑步距离', 'tokenid': [1021, 281, 677, 569, 107, 1393, 163, 2469, 402, 1830, 1566, 20, 1158, 176, 1021, 345, 167, 899], 'shape': [18, 4519]}}), ('ID0043W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:1378790', 'shape': [834, 40]}, 'output': {'text': '我想要将AM设到一零六点九兆赫', 'tokenid': [17, 130, 99, 1611, '<unk>', '<unk>', 683, 75, 66, 783, 283, 74, 419, 2955, 2404], 'shape': [15, 4519]}}), ('ID0042W0474', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6176382', 'shape': [832, 40]}, 'output': {'text': '沈某一个月四次进入宗某家行窃重庆晚报讯家中被偷', 'tokenid': [1610, 71, 66, 39, 745, 366, 1204, 1566, 346, 597, 71, 417, 137, 3861, 573, 834, 73, 326, 1992, 417, 347, 392, 1344], 'shape': [23, 4519]}}), ('IC0007W0451', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7066999', 'shape': [831, 40]}, 'output': {'text': '宿迁市宿城公警官破获一冒充军官跨省诈骗团伙', 'tokenid': [1283, 2508, 22, 1283, 480, 172, 949, 1234, 1319, 1963, 66, 1296, 411, 773, 1234, 1659, 514, 3373, 1498, 110, 684], 'shape': [21, 4519]}}), ('ID0040W0499', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.39.ark:6918320', 'shape': [831, 40]}, 'output': {'text': '顾客消费签帐达一定金额可捐款公益金换购雨伞与雨衣', 'tokenid': [822, 1364, 1012, 1005, 1959, 2119, 752, 66, 87, 497, 170, 47, 4094, 104, 172, 2279, 497, 124, 111, 218, 1445, 501, 218, 40], 'shape': [24, 4519]}}), ('IC0080W0434', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4581482', 'shape': [830, 40]}, 'output': {'text': '三亚海滩中秋夜垃圾八十吨台媒十米一个垃圾桶', 'tokenid': [24, 527, 448, 2109, 347, 1424, 696, 1330, 1331, 777, 27, 2853, 67, 2259, 27, 447, 66, 39, 1330, 1331, 1764], 'shape': [21, 4519]}}), ('IC0080W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5823210', 'shape': [830, 40]}, 'output': {'text': '三名九零后酒后殴打出租司机致死逃窜三年被抓获', 'tokenid': [24, 444, 419, 783, 114, 871, 114, 4393, 94, 56, 2125, 929, 544, 1054, 502, 2023, 3997, 24, 303, 392, 1955, 1963], 'shape': [22, 4519]}})][('IC0096W0217', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4231612', 'shape': [830, 40]}, 'output': {'text': '问这次审计中的新农合资金是由卫生部管理的', 'tokenid': [77, 9, 1204, 2052, 1305, 347, 20, 184, 1065, 45, 1563, 497, 60, 1092, 823, 25, 1081, 897, 666, 20], 'shape': [20, 4519]}}), ('ID0023W0422', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:4555552', 'shape': [830, 40]}, 'output': {'text': '位于冕宁县灵山景区有七名宿营大学生因突发暴雨被困', 'tokenid': [57, 1398, 2973, 1069, 307, 1648, 523, 266, 396, 107, 748, 444, 1283, 1942, 217, 238, 25, 377, 789, 570, 513, 218, 392, 1468], 'shape': [24, 4519]}}), ('IC0002W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2972068', 'shape': [828, 40]}, 'output': {'text': '这种模式不能保证所采购产品是市场上最好的产品', 'tokenid': [9, 568, 820, 821, 42, 177, 628, 859, 434, 2394, 111, 1300, 490, 60, 22, 545, 54, 208, 120, 20, 1300, 490], 'shape': [22, 4519]}}), ('IC0002W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8293437', 'shape': [828, 40]}, 'output': {'text': '备受社会关注的新乡小冀镇五岁男孩王明涵失踪案告破', 'tokenid': [36, 504, 1572, 174, 179, 438, 20, 184, 495, 7, 3150, 819, 401, 796, 995, 866, 595, 308, 1183, 716, 2438, 566, 33, 1319], 'shape': [24, 4519]}}), ('IC0002W0349', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5524709', 'shape': [827, 40]}, 'output': {'text': '虽然杭州将为亚运会投入多少还没有官方声音流出', 'tokenid': [943, 408, 580, 226, 1611, 200, 527, 625, 174, 49, 346, 147, 148, 146, 171, 107, 1234, 292, 182, 152, 149, 56], 'shape': [22, 4519]}}), ('ID0024W0424', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4436084', 'shape': [827, 40]}, 'output': {'text': '贵阳市政府正在为远程医疗纳入医保范围给予政策支持', 'tokenid': [1600, 600, 22, 2203, 1146, 250, 51, 200, 306, 1590, 841, 3167, 2782, 346, 841, 628, 1219, 746, 70, 3422, 2203, 2730, 1168, 1485], 'shape': [24, 4519]}}), ('ID0047W0508', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:6957886', 'shape': [827, 40]}, 'output': {'text': '虽然在奥斯卡影帝角逐中败给了八零后埃迪雷德梅恩', 'tokenid': [943, 408, 51, 316, 1061, 1007, 31, 727, 1927, 3382, 347, 2894, 70, 63, 777, 783, 114, 3351, 734, 467, 621, 835, 342], 'shape': [23, 4519]}}), ('IC0009W0046', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:6749218', 'shape': [826, 40]}, 'output': {'text': '找下YESTERDAYINCEMORE', 'tokenid': [388, 142, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>'], 'shape': [19, 4519]}}), ('ID0026W0390', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.35.ark:3920047', 'shape': [826, 40]}, 'output': {'text': '荆楚网消息记者周三春通讯员文波九月九日上午', 'tokenid': [3269, 1762, 651, 1012, 997, 470, 281, 227, 24, 647, 178, 1992, 994, 772, 1735, 419, 745, 419, 521, 54, 966], 'shape': [21, 4519]}}), ('IC0080W0169', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9036911', 'shape': [825, 40]}, 'output': {'text': '河北省张金龙律师事务所副主任王罡律师认为', 'tokenid': [223, 224, 514, 474, 497, 1062, 320, 578, 363, 1018, 434, 2195, 824, 611, 595, 4222, 320, 578, 199, 200], 'shape': [20, 4519]}}), ('ID0043W0322', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3630545', 'shape': [823, 40]}, 'output': {'text': '八亿七千八百二十六万四千八百四十', 'tokenid': [777, 2721, 748, 1242, 777, 988, 760, 27, 283, 1569, 366, 1242, 777, 988, 366, 27], 'shape': [16, 4519]}}), ('IC0009W0140', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8060360', 'shape': [822, 40]}, 'output': {'text': '从业绩报告的显示评级机构的认定和行业测评的颁奖', 'tokenid': [371, 575, 1581, 326, 33, 20, 862, 948, 1727, 1107, 544, 1594, 20, 199, 87, 232, 137, 575, 2126, 1727, 20, 3370, 1173], 'shape': [23, 4519]}}), ('IC0009W0406', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4385277', 'shape': [821, 40]}, 'output': {'text': '结果因此操作时差而导致阿汤哥面临溺死水中的危险', 'tokenid': [893, 237, 377, 1099, 536, 547, 83, 460, 1044, 85, 1054, 373, 2207, 891, 216, 1157, 3390, 502, 98, 347, 20, 2486, 629], 'shape': [23, 4519]}}), ('IC0085W0275', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8421347', 'shape': [820, 40]}, 'output': {'text': 'ULA所使用的火箭搭载的是俄制RD一百八十发动机', 'tokenid': ['<unk>', '<unk>', '<unk>', 434, 654, 261, 20, 423, 3400, 2268, 258, 20, 60, 1698, 278, '<unk>', '<unk>', 66, 988, 777, 27, 570, 323, 544], 'shape': [24, 4519]}}), ('IC0002W0178', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2349226', 'shape': [819, 40]}, 'output': {'text': '打破过去在外汇管理跨境资金流动方面的多重限制', 'tokenid': [94, 1319, 420, 125, 51, 215, 2018, 897, 666, 1659, 2332, 1563, 497, 149, 323, 292, 216, 20, 147, 573, 550, 278], 'shape': [22, 4519]}}), ('IC0003W0147', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1987536', 'shape': [819, 40]}, 'output': {'text': '似乎是房企目前得到的能最快最直接产生效果的合作', 'tokenid': [882, 1883, 60, 1362, 3202, 723, 931, 471, 75, 20, 177, 208, 214, 208, 1041, 877, 1300, 25, 1074, 237, 20, 45, 547], 'shape': [23, 4519]}}), ('IC0009W0450', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5201089', 'shape': [819, 40]}, 'output': {'text': '六旬农妇建民宿月入十万立约游客刻字罚放羊', 'tokenid': [283, 2840, 1065, 300, 917, 12, 1283, 745, 346, 27, 1569, 1369, 686, 661, 1364, 1407, 445, 2030, 6, 709], 'shape': [20, 4519]}}), ('IC0006W0466', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:6576241', 'shape': [818, 40]}, 'output': {'text': '百分之六十八点七受访者不会以偶像剧主角为择偶标准', 'tokenid': [988, 421, 339, 283, 27, 777, 74, 748, 504, 2386, 281, 42, 174, 48, 1310, 117, 430, 824, 1927, 200, 1681, 1310, 55, 35], 'shape': [24, 4519]}}), ('IC0008W0382', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4627495', 'shape': [818, 40]}, 'output': {'text': '牙买加选手理查兹二十一米六九刷新国家纪录获得季军', 'tokenid': [1335, 616, 285, 687, 241, 666, 80, 2701, 760, 27, 66, 447, 283, 419, 1696, 184, 468, 417, 1694, 290, 1963, 471, 1383, 773], 'shape': [24, 4519]}}), ('IC0080W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4452397', 'shape': [818, 40]}, 'output': {'text': '每晚陪睡价码二十万元台币约三点九万人民币起跳', 'tokenid': [469, 73, 916, 791, 399, 1189, 760, 27, 1569, 1323, 67, 2647, 686, 24, 74, 419, 1569, 72, 12, 2647, 404, 1635], 'shape': [22, 4519]}}), ('ID0024W0421', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4355745', 'shape': [818, 40]}, 'output': {'text': '研究者现在还想要创建更加大型的和小型的四D印制物', 'tokenid': [1918, 1919, 281, 136, 51, 146, 130, 99, 1299, 917, 557, 285, 217, 680, 20, 232, 7, 680, 20, 366, '<unk>', 1500, 278, 132], 'shape': [24, 4519]}}), ('ID0043W0303', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3196718', 'shape': [818, 40]}, 'output': {'text': '三亿零五百七十零九千二百八十七', 'tokenid': [24, 2721, 783, 401, 988, 748, 27, 783, 419, 1242, 760, 988, 777, 27, 748], 'shape': [15, 4519]}}), ('IC0007W0123', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:133415', 'shape': [817, 40]}, 'output': {'text': '新湖中宝继去年入股温州银行抢食金改红利后', 'tokenid': [184, 464, 347, 778, 2137, 125, 303, 346, 950, 161, 226, 659, 137, 2037, 1557, 497, 630, 457, 526, 114], 'shape': [20, 4519]}}), ('IC0007W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6121728', 'shape': [817, 40]}, 'output': {'text': '今日发布了长达二分四十一秒的国剧版长全长预告', 'tokenid': [267, 521, 570, 985, 63, 176, 752, 760, 421, 366, 27, 66, 1613, 20, 468, 430, 483, 176, 11, 176, 32, 33], 'shape': [22, 4519]}}), ('IC0083W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:3833647', 'shape': [817, 40]}, 'output': {'text': '三名醉酒男子持刀捅伤一名出租车司机后骑摩托车逃跑', 'tokenid': [24, 444, 532, 871, 995, 393, 1485, 1102, 3525, 209, 66, 444, 56, 2125, 129, 929, 544, 114, 1267, 1999, 1083, 129, 2023, 1021], 'shape': [24, 4519]}}), ('IC0096W0228', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4463655', 'shape': [817, 40]}, 'output': {'text': '农村居民因病致贫因病返贫的状况得到很大缓解', 'tokenid': [1065, 496, 1263, 12, 377, 843, 1054, 3155, 377, 843, 1537, 3155, 20, 1776, 473, 471, 75, 244, 217, 1029, 1365], 'shape': [21, 4519]}}), ('IC0007W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7187124', 'shape': [816, 40]}, 'output': {'text': '六分钟吃掉一点五斤凉菜大妈获一千六百六十六元奖励', 'tokenid': [283, 421, 1014, 853, 1337, 66, 74, 401, 2589, 1162, 1353, 217, 939, 1963, 66, 1242, 283, 988, 283, 27, 283, 1323, 1173, 1938], 'shape': [24, 4519]}}), ('IC0008W0165', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:645094', 'shape': [816, 40]}, 'output': {'text': '则是宝龙在二零一四动荡之年成功逆势稳企企稳的重要举措', 'tokenid': [2134, 60, 778, 1062, 51, 760, 783, 66, 366, 323, 1827, 339, 303, 309, 1100, 1176, 1551, 1737, 3202, 3202, 1737, 20, 573, 99, 453, 2592], 'shape': [26, 4519]}}), ('IC0003W0436', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7942673', 'shape': [815, 40]}, 'output': {'text': '五岁男童遭患癌继母连砍三十多刀经抢救已脱险', 'tokenid': [401, 796, 995, 1068, 2600, 2977, 3647, 2137, 2078, 656, 2077, 24, 27, 147, 1102, 367, 2037, 1347, 1197, 97, 629], 'shape': [21, 4519]}}), ('IC0009W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5596130', 'shape': [815, 40]}, 'output': {'text': '六旬翁致已婚女怀孕生子女子瞒丈夫十四年被发现', 'tokenid': [283, 2840, 3281, 1054, 1197, 838, 766, 1257, 3070, 25, 393, 766, 393, 1862, 3854, 1066, 27, 366, 303, 392, 570, 136], 'shape': [22, 4519]}}), ('IC0085W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2998969', 'shape': [815, 40]}, 'output': {'text': '上半年中国农民工数量增百分之一年轻农民工缺口明显', 'tokenid': [54, 1170, 303, 347, 468, 1065, 12, 776, 1131, 150, 1836, 988, 421, 339, 66, 303, 443, 1065, 12, 776, 925, 944, 308, 862], 'shape': [24, 4519]}}), ('IC0097W0017', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:279411', 'shape': [815, 40]}, 'output': {'text': '西单商场天通苑购物中心天通西苑社区卫生服务站东', 'tokenid': [873, 159, 1510, 545, 78, 178, 1570, 111, 132, 347, 163, 78, 178, 873, 1570, 1572, 396, 823, 25, 41, 1018, 774, 359], 'shape': [23, 4519]}})]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0085W0214', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:7183574', 'shape': [879, 40]}, 'output': {'text': '某保险公司权益投资部总经理曾某借职务之便建老鼠仓', 'tokenid': [71, 628, 629, 172, 929, 1480, 2279, 49, 1563, 1081, 878, 367, 666, 1935, 71, 714, 2124, 1018, 339, 293, 917, 282, 1441, 2852], 'shape': [24, 4519]}}), ('IC0009W0360', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3432159', 'shape': [878, 40]}, 'output': {'text': '首先由带有浓浓山海关特色的项目一古城定向开场', 'tokenid': [207, 564, 1092, 494, 107, 2702, 2702, 523, 448, 179, 336, 724, 20, 2447, 723, 66, 1128, 480, 87, 543, 95, 545], 'shape': [22, 4519]}}), ('IC0007W0129', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:297573', 'shape': [877, 40]}, 'output': {'text': '持股比例直逼宁波银行第一大股东新加坡华侨银行', 'tokenid': [1485, 950, 268, 454, 1041, 2357, 1069, 1735, 659, 137, 254, 66, 217, 950, 359, 184, 285, 1853, 562, 2989, 659, 137], 'shape': [22, 4519]}}), ('ID0020W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.29.ark:4167261', 'shape': [877, 40]}, 'output': {'text': '浙江杀人嫌犯潜逃一七年持兄长身份证乘地铁被抓', 'tokenid': [2008, 356, 531, 72, 2129, 2270, 2474, 2023, 66, 748, 303, 1485, 589, 176, 701, 858, 859, 1771, 52, 1261, 392, 1955], 'shape': [22, 4519]}}), ('ID0043W0520', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:7043799', 'shape': [877, 40]}, 'output': {'text': '严禁骗取套取中央和省级财政城市棚户区改造专项资金', 'tokenid': [1313, 1386, 1498, 1011, 1866, 1011, 347, 809, 232, 514, 1107, 1715, 2203, 480, 22, 1171, 168, 396, 630, 1855, 204, 2447, 1563, 497], 'shape': [24, 4519]}}), ('IC0080W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5212413', 'shape': [875, 40]}, 'output': {'text': '三兄弟驾渔船长江翻船老大漂流十余公里获救', 'tokenid': [24, 589, 590, 1772, 3739, 1028, 176, 356, 1815, 1028, 282, 217, 412, 149, 27, 169, 172, 10, 1963, 1347], 'shape': [20, 4519]}}), ('IC0009W0126', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7748858', 'shape': [873, 40]}, 'output': {'text': '以及类酒店产品宝龙客栈及宝龙少海房车露营地', 'tokenid': [48, 435, 340, 871, 106, 1300, 490, 778, 1062, 1364, 3147, 435, 778, 1062, 148, 448, 1362, 129, 1491, 1942, 52], 'shape': [21, 4519]}}), ('IC0007W0454', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7133698', 'shape': [872, 40]}, 'output': {'text': '六内地乘客打伤七香港机场的勤续四人被判处监禁', 'tokenid': [283, 519, 52, 1771, 1364, 94, 209, 748, 1113, 1135, 544, 545, 20, 1754, 1856, 366, 72, 392, 1786, 665, 2050, 1386], 'shape': [22, 4519]}}), ('IC0080W0127', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:8159405', 'shape': [872, 40]}, 'output': {'text': '野三坡风景名胜区范围为为东北方向与北京市相邻', 'tokenid': [485, 24, 1853, 145, 266, 444, 1148, 396, 1219, 746, 200, 200, 359, 224, 292, 543, 501, 224, 360, 22, 831, 3331], 'shape': [22, 4519]}}), ('IC0096W0374', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7466033', 'shape': [872, 40]}, 'output': {'text': '东道主福州队的徐婷婷摘得女子飞碟双向桂冠', 'tokenid': [359, 44, 824, 642, 226, 1060, 20, 856, 1870, 1870, 3220, 471, 766, 393, 406, 2400, 255, 543, 1761, 1462], 'shape': [20, 4519]}}), ('IC0007W0128', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:262380', 'shape': [871, 40]}, 'output': {'text': '雅戈尔持有宁波银行总股份达三点五一亿股', 'tokenid': [498, 2812, 529, 1485, 107, 1069, 1735, 659, 137, 878, 950, 858, 752, 24, 74, 401, 66, 2721, 950], 'shape': [19, 4519]}}), ('ID0043W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5277342', 'shape': [868, 40]}, 'output': {'text': '而张继科则是拿下了生涯第六个全锦赛男单奖牌', 'tokenid': [1044, 474, 2137, 1523, 2134, 60, 933, 142, 63, 25, 2319, 254, 283, 39, 11, 1298, 1180, 995, 159, 1173, 1252], 'shape': [21, 4519]}}), ('IC0080W0432', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4527736', 'shape': [867, 40]}, 'output': {'text': '三亚景区售玳瑁三万元两只二月份曾被立案调查', 'tokenid': [24, 527, 266, 396, 978, '<unk>', '<unk>', 24, 1569, 1323, 505, 96, 760, 745, 858, 1935, 392, 1369, 566, 160, 80], 'shape': [21, 4519]}}), ('IC0007W0168', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:1098660', 'shape': [866, 40]}, 'output': {'text': '信贷和公积金政策的逐渐宽松对市场而言是较大的利好', 'tokenid': [369, 2975, 232, 172, 1621, 497, 2203, 2730, 20, 3382, 1830, 1790, 354, 935, 22, 545, 1044, 4, 60, 581, 217, 20, 526, 120], 'shape': [24, 4519]}}), ('IC0080W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4096163', 'shape': [865, 40]}, 'output': {'text': '中国新闻网七月二十三日报道据台湾东森新闻消息', 'tokenid': [347, 468, 184, 185, 651, 748, 745, 760, 27, 24, 521, 326, 44, 1775, 67, 1917, 359, 1505, 184, 185, 1012, 997], 'shape': [22, 4519]}}), ('IC0007W0481', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7810269', 'shape': [863, 40]}, 'output': {'text': '六岁女童村中被扎身亡货车扎头部后仍继续行驶', 'tokenid': [283, 796, 766, 1068, 496, 347, 392, 2387, 701, 2024, 996, 129, 2387, 118, 1081, 114, 3031, 2137, 1856, 137, 138], 'shape': [21, 4519]}}), ('IC0080W0385', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3581018', 'shape': [863, 40]}, 'output': {'text': '难遇天才竟急速沉沦张继科在负面新闻中渐行渐远', 'tokenid': [937, 699, 78, 673, 2155, 455, 62, 1797, 2855, 474, 2137, 1523, 51, 731, 216, 184, 185, 347, 1830, 137, 1830, 306], 'shape': [22, 4519]}}), ('IC0003W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1890364', 'shape': [858, 40]}, 'output': {'text': '怎么完成传统房企与互联网融合落地都是其研究的大事', 'tokenid': [101, 109, 37, 309, 664, 1072, 1362, 3202, 501, 2806, 1008, 651, 2478, 45, 717, 52, 173, 60, 1144, 1918, 1919, 20, 217, 363], 'shape': [24, 4519]}}), ('IC0080W0443', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4757299', 'shape': [858, 40]}, 'output': {'text': '三亚餐厅扇贝十五元一份变十五元一个续同意退款', 'tokenid': [24, 527, 884, 1542, 2789, 863, 27, 401, 1323, 66, 858, 730, 27, 401, 1323, 66, 39, 1856, 426, 439, 627, 104], 'shape': [22, 4519]}}), ('ID0047W0379', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4383869', 'shape': [858, 40]}, 'output': {'text': '索玛基金会理事长黄红斌被当地森林公安带走', 'tokenid': [16, 2017, 228, 497, 174, 666, 363, 176, 489, 457, 816, 392, 472, 52, 1505, 593, 172, 720, 494, 520], 'shape': [20, 4519]}}), ('IC0080W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4918257', 'shape': [857, 40]}, 'output': {'text': '华商报咸阳讯记者张林三小伙伴相约出去玩水', 'tokenid': [562, 1510, 326, 3364, 600, 1992, 470, 281, 474, 593, 24, 7, 684, 409, 831, 686, 56, 125, 221, 98], 'shape': [20, 4519]}}), ('IC0087W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:5112895', 'shape': [855, 40]}, 'output': {'text': '国家统计局今日发布七十大中城市房价变动情况', 'tokenid': [468, 417, 1072, 1305, 1457, 267, 521, 570, 985, 748, 27, 217, 347, 480, 22, 1362, 399, 730, 323, 164, 473], 'shape': [21, 4519]}}), ('IC0097W0163', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:2539429', 'shape': [855, 40]}, 'output': {'text': '二零一五年上半年又经历了三次降息两次降准调整力度堪比二零零八年', 'tokenid': [760, 783, 66, 401, 303, 54, 1170, 303, 324, 367, 1285, 63, 24, 1204, 591, 997, 505, 1204, 591, 35, 160, 658, 270, 139, 3053, 268, 760, 783, 783, 777, 303], 'shape': [31, 4519]}}), ('IC0096W0479', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9619418', 'shape': [854, 40]}, 'output': {'text': '上海崇明通报村官腐案虚开发票套取公款等', 'tokenid': [54, 448, 1960, 308, 178, 326, 496, 1234, 1595, 566, 2475, 95, 570, 626, 1866, 1011, 172, 104, 537], 'shape': [19, 4519]}}), ('ID0041W0399', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5125879', 'shape': [854, 40]}, 'output': {'text': '现在微传科技也积极在海外对自己的技术进行保护', 'tokenid': [136, 51, 719, 664, 1523, 1555, 229, 1621, 1089, 51, 448, 215, 935, 275, 276, 20, 1555, 1032, 1566, 137, 628, 1670], 'shape': [22, 4519]}}), ('IC0003W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8134883', 'shape': [853, 40]}, 'output': {'text': '孩子的父亲在手术室外痛哭昨日下午二时三十分左右', 'tokenid': [866, 393, 20, 614, 615, 51, 241, 1032, 782, 215, 634, 1447, 605, 521, 142, 966, 760, 83, 24, 27, 421, 1824, 2063], 'shape': [23, 4519]}}), ('IC0003W0480', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8949725', 'shape': [853, 40]}, 'output': {'text': '五男孩去玩水一人溺亡小伙伴将其物品扔掉隐瞒', 'tokenid': [401, 995, 866, 125, 221, 98, 66, 72, 3390, 2024, 7, 684, 409, 1611, 1144, 132, 490, 2880, 1337, 2196, 1862], 'shape': [21, 4519]}}), ('IC0002W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1716617', 'shape': [852, 40]}, 'output': {'text': '而另外一剂楼市强心剂则当属央行的不对称降息', 'tokenid': [1044, 2092, 215, 66, 3040, 1294, 22, 844, 163, 3040, 2134, 472, 620, 809, 137, 20, 42, 935, 938, 591, 997], 'shape': [21, 4519]}}), ('IC0003W0482', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9006551', 'shape': [852, 40]}, 'output': {'text': '周至警方初步排除他杀可能一当事孩子称', 'tokenid': [227, 668, 949, 292, 1795, 345, 144, 587, 194, 531, 47, 177, 66, 472, 363, 866, 393, 938], 'shape': [18, 4519]}}), ('IC0007W0253', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2915505', 'shape': [851, 40]}, 'output': {'text': '透明窗式显示屏交互式虚拟镜子以及拍照手机等', 'tokenid': [1554, 308, 770, 821, 862, 948, 1612, 81, 2806, 821, 2475, 3479, 1385, 393, 48, 435, 1848, 1259, 241, 544, 537], 'shape': [21, 4519]}}), ('IC0084W0154', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:6790101', 'shape': [850, 40]}, 'output': {'text': '五十四个城市住宅签约总量达十九万四千九百零九套', 'tokenid': [401, 27, 366, 39, 480, 22, 381, 1597, 1959, 686, 878, 150, 752, 27, 419, 1569, 366, 1242, 419, 988, 783, 419, 1866], 'shape': [23, 4519]}}), ('IC0009W0452', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5256635', 'shape': [849, 40]}, 'output': {'text': '六旬大妈伪造签证多次赴美涉偷越国境被判管制一年', 'tokenid': [283, 2840, 217, 939, 1515, 1855, 1959, 859, 147, 1204, 3497, 567, 1743, 1344, 446, 468, 2332, 392, 1786, 897, 278, 66, 303], 'shape': [23, 4519]}})]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : None\n"
     ]
    }
   ],
   "source": [
    "for i, (data) in enumerate(te_loader):\n",
    "    print(str(i)+\" : \"+str(data))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('IC0096W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:5912144', 'shape': [1325, 40]}, 'output': {'text': '就是加入了P点对点对等网络传输的更新方式', 'tokenid': [220, 60, 285, 346, 63, '<unk>', 74, 935, 74, 935, 537, 651, 652, 664, 1746, 20, 557, 184, 292, 821], 'shape': [20, 4519]}}), ('IC0094W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:36078', 'shape': [1166, 40]}, 'output': {'text': '王辰澎湃资料八月十八日下午二点二十分左右在运行时梯街突然跳起', 'tokenid': [595, 1150, 2890, 3877, 1563, 1564, 777, 745, 27, 777, 521, 142, 966, 760, 74, 760, 27, 421, 1824, 2063, 51, 625, 137, 83, 1309, 127, 789, 408, 1635, 404], 'shape': [30, 4519]}}), ('ID0042W0471', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6090243', 'shape': [1146, 40]}, 'output': {'text': '但作为深圳国资委旗下最大的房产房地产但作为深圳国资委旗下最大的房地产公司', 'tokenid': [552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 1300, 1362, 52, 1300, 552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 52, 1300, 172, 929], 'shape': [36, 4519]}}), ('IC0007W0369', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5394213', 'shape': [1101, 40]}, 'output': {'text': '他的主要对手是美国是田径锦赛锦标赛的金牌得主乔科斯瓦斯', 'tokenid': [194, 20, 824, 99, 935, 241, 60, 567, 468, 60, 484, 2219, 1298, 1180, 1298, 55, 1180, 20, 497, 1252, 471, 824, 624, 1523, 1061, 914, 1061], 'shape': [27, 4519]}}), ('IC0080W0192', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9525870', 'shape': [1059, 40]}, 'output': {'text': '积极稳妥探索水利建设贷款等涉农贷款资产证券化试点', 'tokenid': [1621, 1089, 1737, 2257, 1503, 16, 98, 526, 917, 683, 2975, 104, 537, 1743, 1065, 2975, 104, 1563, 1300, 859, 952, 1351, 930, 74], 'shape': [24, 4519]}}), ('IC0003W0027', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8932830', 'shape': [1057, 40]}, 'output': {'text': '万达广场中共北京市石景山区委员会东', 'tokenid': [1569, 752, 365, 545, 347, 503, 224, 360, 22, 912, 266, 523, 396, 1721, 994, 174, 359], 'shape': [17, 4519]}}), ('IC0003W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9076930', 'shape': [1043, 40]}, 'output': {'text': '五种高价中药材价格普降超两成一半品种都在下跌', 'tokenid': [401, 568, 246, 399, 347, 876, 1605, 399, 400, 971, 591, 21, 505, 309, 66, 1170, 490, 568, 173, 51, 142, 2462], 'shape': [22, 4519]}}), ('IC0095W0392', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:7196508', 'shape': [1032, 40]}, 'output': {'text': '拿过上海市五一杯长跑比赛冠军杨浦区迎春长跑冠军', 'tokenid': [933, 420, 54, 448, 22, 401, 66, 1638, 176, 1021, 268, 1180, 1462, 773, 337, 2013, 396, 2342, 647, 176, 1021, 1462, 773], 'shape': [23, 4519]}}), ('IC0003W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8547324', 'shape': [1029, 40]}, 'output': {'text': '五村官私分七二一救灾款获刑获刑两年半至三年', 'tokenid': [401, 496, 1234, 1589, 421, 748, 760, 66, 1347, 3517, 104, 1963, 1233, 1963, 1233, 505, 303, 1170, 668, 24, 303], 'shape': [21, 4519]}}), ('IC0080W0194', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9594096', 'shape': [1029, 40]}, 'output': {'text': '建立金融支持水利改革发展的风险分散和政策保障机制', 'tokenid': [917, 1369, 497, 2478, 1168, 1485, 98, 526, 630, 1592, 570, 2053, 20, 145, 629, 421, 1379, 232, 2203, 2730, 628, 1718, 544, 278], 'shape': [24, 4519]}}), ('ID0043W0445', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5565524', 'shape': [1016, 40]}, 'output': {'text': '三盗墓者酒后捅伤的哥一人背部文有钟馗图案', 'tokenid': [24, 1765, 2016, 281, 871, 114, 3525, 209, 20, 891, 66, 72, 265, 1081, 772, 107, 1014, 4268, 53, 566], 'shape': [20, 4519]}}), ('ID0021W0389', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.30.ark:3999585', 'shape': [1012, 40]}, 'output': {'text': '二零一四财年收入下降四点百分之八至二二七八百六五十二亿日元', 'tokenid': [760, 783, 66, 366, 1715, 303, 131, 346, 142, 591, 366, 74, 988, 421, 339, 777, 668, 760, 760, 748, 777, 988, 283, 401, 27, 760, 2721, 521, 1323], 'shape': [29, 4519]}}), ('IC0003W0352', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:6187541', 'shape': [1009, 40]}, 'output': {'text': '它们分别是布达佩斯汉堡洛杉矶巴黎和罗马', 'tokenid': [262, 198, 421, 279, 60, 985, 752, 1376, 1061, 355, 2648, 273, 2015, 3882, 763, 1228, 232, 1127, 643], 'shape': [19, 4519]}}), ('IC0007W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5268808', 'shape': [1005, 40]}, 'output': {'text': '二零一三年世锦赛铜牌的得主卢克斯梅里赫也不容忽视', 'tokenid': [760, 783, 66, 24, 303, 26, 1298, 1180, 1059, 1252, 20, 471, 824, 1138, 317, 1061, 835, 10, 2404, 229, 42, 603, 1923, 93], 'shape': [24, 4519]}}), ('IC0006W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:7056553', 'shape': [1001, 40]}, 'output': {'text': '六十九岁独腿环卫工拄拐杖扫大街七年半小时扫三百米', 'tokenid': [283, 27, 419, 796, 335, 1995, 402, 823, 776, '<unk>', 2424, 3955, 1363, 217, 127, 748, 303, 1170, 7, 83, 1363, 24, 988, 447], 'shape': [24, 4519]}}), ('ID0043W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5971158', 'shape': [985, 40]}, 'output': {'text': '香港航空HX三零四飞往北京的航班延误九小时', 'tokenid': [1113, 1135, 86, 264, '<unk>', '<unk>', 24, 783, 366, 406, 1675, 224, 360, 20, 86, 143, 1158, 1159, 419, 7, 83], 'shape': [21, 4519]}}), ('IC0096W0323', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6414270', 'shape': [980, 40]}, 'output': {'text': '每二千辆电动汽车至少配套建设一座公交充电站', 'tokenid': [469, 760, 1242, 1604, 30, 323, 1306, 129, 668, 148, 1460, 1866, 917, 683, 66, 69, 172, 81, 411, 30, 774], 'shape': [21, 4519]}}), ('IC0009W0387', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3931850', 'shape': [966, 40]}, 'output': {'text': '山海关区旅游局山海关区体育局以及各相关单位承办', 'tokenid': [523, 448, 179, 396, 1571, 661, 1457, 523, 448, 179, 396, 451, 1184, 1457, 48, 435, 1115, 831, 179, 159, 57, 1121, 165], 'shape': [23, 4519]}}), ('IC0007W0478', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7739130', 'shape': [962, 40]}, 'output': {'text': '六岁女孩肺功能衰竭如八旬老人病因成谜呼气吃力', 'tokenid': [283, 796, 766, 866, 1588, 1100, 177, 2870, 2700, 236, 777, 2840, 282, 72, 843, 377, 309, 3038, 1110, 79, 853, 270], 'shape': [22, 4519]}}), ('IC0002W0363', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5831491', 'shape': [959, 40]}, 'output': {'text': '精工钢构亚厦股份杭钢股份等值得关注', 'tokenid': [1598, 776, 440, 1594, 527, 1846, 950, 858, 580, 440, 950, 858, 537, 883, 471, 179, 438], 'shape': [17, 4519]}}), ('ID0043W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3286257', 'shape': [959, 40]}, 'output': {'text': '九亿五千九百七十四万两千四百零五点一三', 'tokenid': [419, 2721, 401, 1242, 419, 988, 748, 27, 366, 1569, 505, 1242, 366, 988, 783, 401, 74, 66, 24], 'shape': [19, 4519]}}), ('IC0009W0414', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4539381', 'shape': [955, 40]}, 'output': {'text': '搜狐娱乐讯刘嘉玲日前戴上半亿珠宝现身北京出席活动', 'tokenid': [15, 2159, 183, 157, 1992, 583, 1108, 1924, 521, 931, 1849, 54, 1170, 2721, 1791, 778, 136, 701, 224, 360, 56, 2028, 1584, 323], 'shape': [24, 4519]}}), ('ID0041W0470', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:6703502', 'shape': [955, 40]}, 'output': {'text': '而六月居冠的天河北成交宗数下近百分之二', 'tokenid': [1044, 283, 745, 1263, 1462, 20, 78, 223, 224, 309, 81, 597, 1131, 142, 19, 988, 421, 339, 760], 'shape': [19, 4519]}}), ('IC0009W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5629083', 'shape': [954, 40]}, 'output': {'text': '六十五岁的通许县人潘安在郑州市做学生课桌椅加工生意', 'tokenid': [283, 27, 401, 796, 20, 178, 540, 307, 72, 2111, 720, 51, 201, 226, 22, 263, 238, 25, 353, 1284, 1975, 285, 776, 25, 439], 'shape': [25, 4519]}}), ('IC0007W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7463127', 'shape': [952, 40]}, 'output': {'text': '他们结伴去位于觐州奉化交界的卖柴岳徒步', 'tokenid': [194, 198, 893, 409, 125, 57, 1398, '<unk>', 226, 2944, 1351, 81, 1125, 20, 397, 1118, 1501, 1905, 345], 'shape': [19, 4519]}}), ('IC0096W0122', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2248237', 'shape': [950, 40]}, 'output': {'text': '不仅是北京上海广州等一线城市地块溢价率在走高', 'tokenid': [42, 1891, 60, 224, 360, 54, 448, 365, 226, 537, 66, 284, 480, 22, 52, 907, 1989, 399, 1593, 51, 520, 246], 'shape': [22, 4519]}}), ('ID0046W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5373814', 'shape': [947, 40]}, 'output': {'text': '自自己只是把二传手们喜欢的两次吊球变成两次进攻而已', 'tokenid': [275, 275, 276, 96, 60, 92, 760, 664, 241, 198, 88, 89, 20, 505, 1204, 3191, 1039, 730, 309, 505, 1204, 1566, 845, 1044, 1197], 'shape': [25, 4519]}}), ('IC0009W0166', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8596778', 'shape': [934, 40]}, 'output': {'text': '同比增长百分之二十一点七净资产均值为二百零二点三亿元', 'tokenid': [426, 268, 1836, 176, 988, 421, 339, 760, 27, 66, 74, 748, 582, 1563, 1300, 671, 883, 200, 760, 988, 783, 760, 74, 24, 2721, 1323], 'shape': [26, 4519]}}), ('IC0009W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4272465', 'shape': [934, 40]}, 'output': {'text': '以色列选手米宁科十四米七八创造国家纪录摘得银牌', 'tokenid': [48, 724, 429, 687, 241, 447, 1069, 1523, 27, 366, 447, 748, 777, 1299, 1855, 468, 417, 1694, 290, 3220, 471, 659, 1252], 'shape': [23, 4519]}}), ('IC0087W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:2815299', 'shape': [934, 40]}, 'output': {'text': '上海一大巴司机俯身捡手机致翻车六名乘客死亡', 'tokenid': [54, 448, 66, 217, 763, 929, 544, '<unk>', 701, 3062, 241, 544, 1054, 1815, 129, 283, 444, 1771, 1364, 502, 2024], 'shape': [21, 4519]}}), ('IC0089W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.18.ark:7895628', 'shape': [931, 40]}, 'output': {'text': '五组队员以二十四小时总跑量二万七千九百七十五点五三的成绩完美的结束了此次比赛', 'tokenid': [401, 911, 1060, 994, 48, 760, 27, 366, 7, 83, 878, 1021, 150, 760, 1569, 748, 1242, 419, 988, 748, 27, 401, 74, 401, 24, 20, 309, 1581, 37, 567, 20, 893, 894, 63, 1099, 1204, 268, 1180], 'shape': [38, 4519]}}), ('IC0002W0133', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1397741', 'shape': [930, 40]}, 'output': {'text': '其中包括对拥有一套住房并已结清相应购房贷款的家庭', 'tokenid': [1144, 347, 872, 1052, 935, 1825, 107, 66, 1866, 381, 1362, 1073, 1197, 893, 768, 831, 677, 111, 1362, 2975, 104, 20, 417, 1770], 'shape': [24, 4519]}})]]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('IC0007W0264', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:3152908', 'shape': [927, 40]}, 'output': {'text': 'CX六百采用的也并非是FIREFOX操作系统', 'tokenid': ['<unk>', '<unk>', 283, 988, 2394, 261, 20, 229, 1073, 959, 60, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 536, 547, 1009, 1072], 'shape': [22, 4519]}}), ('IC0085W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:193509', 'shape': [926, 40]}, 'output': {'text': '并成功实现了商流信息流资金流与物流的四流合一', 'tokenid': [1073, 309, 1100, 1207, 136, 63, 1510, 149, 369, 997, 149, 1563, 497, 149, 501, 132, 149, 20, 366, 149, 45, 66], 'shape': [22, 4519]}}), ('IC0082W0021', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.11.ark:5199439', 'shape': [924, 40]}, 'output': {'text': '幺九四九THEHIDDENCCITY三里屯店', 'tokenid': [1844, 419, 366, 419, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 24, 10, 2265, 106], 'shape': [22, 4519]}}), ('ID0042W0179', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:1680666', 'shape': [922, 40]}, 'output': {'text': '烦请把空调温度调到调大到二十六度烦请把空调温度调大到二十六度', 'tokenid': [349, 2, 92, 264, 160, 161, 139, 160, 75, 160, 217, 75, 760, 27, 283, 139, 349, 2, 92, 264, 160, 161, 139, 160, 217, 75, 760, 27, 283, 139], 'shape': [30, 4519]}}), ('IC0087W0350', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:312919', 'shape': [921, 40]}, 'output': {'text': '昆仑决周口站闫西波剑指帽子戏法搜狐体育', 'tokenid': [1773, 2007, 1366, 227, 944, 774, 304, 873, 1735, 1415, 134, 1040, 393, 662, 187, 15, 2159, 451, 1184], 'shape': [19, 4519]}}), ('IC0002W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:4645365', 'shape': [920, 40]}, 'output': {'text': '从智能手表到带嵌入式电子的健身手镯珠宝和衣服', 'tokenid': [371, 1768, 177, 241, 1116, 75, 494, 3939, 346, 821, 30, 393, 20, 563, 701, 241, 3698, 1791, 778, 232, 40, 41], 'shape': [22, 4519]}}), ('IC0080W0357', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3053894', 'shape': [918, 40]}, 'output': {'text': '不管是新科世锦冠军马龙还是直板第一人许昕', 'tokenid': [42, 897, 60, 184, 1523, 26, 1298, 1462, 773, 643, 1062, 146, 60, 1041, 1317, 254, 66, 72, 540, 2087], 'shape': [20, 4519]}}), ('IC0007W0247', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2759707', 'shape': [914, 40]}, 'output': {'text': '年底前全面实施儿童白血病等八个病种的大病保障', 'tokenid': [303, 1195, 931, 11, 216, 1207, 1178, 249, 1068, 1042, 1214, 843, 537, 777, 39, 843, 568, 20, 217, 843, 628, 1718], 'shape': [22, 4519]}}), ('IC0008W0241', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:2079482', 'shape': [914, 40]}, 'output': {'text': '将推动保险资产向更长久期和更低风险资产配置过渡', 'tokenid': [1611, 64, 323, 628, 629, 1563, 1300, 543, 557, 176, 1024, 196, 232, 557, 991, 145, 629, 1563, 1300, 1460, 58, 420, 1522], 'shape': [23, 4519]}}), ('ID0047W0515', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:7111197', 'shape': [914, 40]}, 'output': {'text': '新股民亏四零万很闹心吃饭时心不在焉被枣核卡喉', 'tokenid': [184, 950, 12, 2802, 366, 783, 1569, 244, 1013, 163, 853, 852, 83, 163, 42, 51, 3200, 392, 2610, 1562, 1007, 2359], 'shape': [22, 4519]}}), ('IC0003W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:2391636', 'shape': [912, 40]}, 'output': {'text': '房企跨界触网营销等实质上都是在打抢客户大战', 'tokenid': [1362, 3202, 1659, 1125, 2534, 651, 1942, 977, 537, 1207, 1926, 54, 173, 60, 51, 94, 2037, 1364, 168, 217, 622], 'shape': [21, 4519]}}), ('IC0003W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8453225', 'shape': [908, 40]}, 'output': {'text': '皇姑区人民法院公布了这起民事纠纷案件的判决结果', 'tokenid': [1456, 710, 396, 72, 12, 187, 515, 172, 985, 63, 9, 404, 12, 363, 2899, 2771, 566, 1105, 20, 1786, 1366, 893, 237], 'shape': [23, 4519]}}), ('IC0002W0412', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:6772348', 'shape': [904, 40]}, 'output': {'text': '她透露日前以大使身份到四川探访眼疾病童四日', 'tokenid': [886, 1554, 1491, 521, 931, 48, 217, 654, 701, 858, 75, 366, 1334, 1503, 2386, 256, 3402, 843, 1068, 366, 521], 'shape': [21, 4519]}}), ('IC0096W0376', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7521059', 'shape': [902, 40]}, 'output': {'text': '上海静安队的朱莹洁首先在跪射阶段处于落后状态', 'tokenid': [54, 448, 828, 720, 1060, 20, 1615, 1652, 1532, 207, 564, 51, 491, 2198, 1164, 518, 665, 1398, 717, 114, 1776, 679], 'shape': [22, 4519]}}), ('IC0009W0458', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5391553', 'shape': [900, 40]}, 'output': {'text': '六旬男给九零后小三买车房女孩与人暧昧被逼写承诺书', 'tokenid': [283, 2840, 995, 70, 419, 783, 114, 7, 24, 616, 129, 1362, 766, 866, 501, 72, 2249, 2250, 392, 2357, 861, 1121, 1122, 697], 'shape': [24, 4519]}}), ('IC0002W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7683440', 'shape': [899, 40]}, 'output': {'text': '妈妈带着鹏鹏前往顺义区首儿李桥儿童医院口腔科看病', 'tokenid': [939, 939, 494, 341, 2262, 2262, 931, 1675, 1050, 1399, 396, 207, 249, 414, 1293, 249, 1068, 841, 515, 944, 2490, 1523, 121, 843], 'shape': [24, 4519]}}), ('IC0085W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2714460', 'shape': [899, 40]}, 'output': {'text': '三门峡市湖滨区会兴镇东坡村段黄河河岸突然塌陷', 'tokenid': [24, 192, 1528, 22, 464, 2193, 396, 174, 1270, 819, 359, 1853, 496, 518, 489, 223, 223, 1763, 789, 408, 3589, 3354], 'shape': [22, 4519]}}), ('ID0047W0396', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4697510', 'shape': [898, 40]}, 'output': {'text': '欣赏到了奇幻魔术秀面部彩绘等精彩节目', 'tokenid': [689, 690, 75, 63, 895, 465, 713, 1032, 984, 216, 1081, 1599, 3297, 537, 1598, 1599, 433, 723], 'shape': [18, 4519]}}), ('IC0007W0373', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5497905', 'shape': [897, 40]}, 'output': {'text': '他在摩纳哥站投出了两二十二秒五六的个人最好成绩', 'tokenid': [194, 51, 1999, 2782, 891, 774, 49, 56, 63, 505, 760, 27, 760, 1613, 401, 283, 20, 39, 72, 208, 120, 309, 1581], 'shape': [23, 4519]}}), ('IC0007W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2066192', 'shape': [895, 40]}, 'output': {'text': '符合医保定点相关规定的非公立医疗机构才可以申请', 'tokenid': [1346, 45, 841, 628, 87, 74, 831, 179, 1769, 87, 20, 959, 172, 1369, 841, 3167, 544, 1594, 673, 47, 48, 2202, 2], 'shape': [23, 4519]}}), ('IC0007W0493', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:8098425', 'shape': [892, 40]}, 'output': {'text': '六岁娃处女膜破裂学校录像离奇被剪疑遭校长猥琐', 'tokenid': [283, 796, 1549, 665, 766, 2785, 1319, 1506, 238, 516, 290, 117, 899, 895, 392, 1728, 1258, 2600, 516, 176, 2079, 2080], 'shape': [22, 4519]}}), ('IC0008W0393', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4872258', 'shape': [892, 40]}, 'output': {'text': '沙特阿拉伯选手马马斯拉赫以四十三秒九三预获得预赛第一', 'tokenid': [765, 336, 373, 188, 1375, 687, 241, 643, 643, 1061, 188, 2404, 48, 366, 27, 24, 1613, 419, 24, 32, 1963, 471, 32, 1180, 254, 66], 'shape': [26, 4519]}}), ('IC0085W0305', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:157476', 'shape': [892, 40]}, 'output': {'text': '信E付将面向全行业用户提供上下游供应链服务新模式', 'tokenid': [369, '<unk>', 103, 1611, 216, 543, 11, 137, 575, 261, 168, 239, 2214, 54, 142, 661, 2214, 677, 1186, 41, 1018, 184, 820, 821], 'shape': [24, 4519]}}), ('ID0049W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.48.ark:4718586', 'shape': [891, 40]}, 'output': {'text': '郭喜闯Vs叶夫根叶夫根尼沃龙科夫罗斯', 'tokenid': [1126, 88, 1247, '<unk>', 702, 797, 1066, 219, 797, 1066, 219, 1608, 2796, 1062, 1523, 1066, 1127, 1061], 'shape': [18, 4519]}}), ('ID0043W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5606517', 'shape': [887, 40]}, 'output': {'text': '白马塔大桥是敦煌市确定的十大重点工程之一', 'tokenid': [1042, 643, 1930, 217, 1293, 60, 1785, 2049, 22, 1325, 87, 20, 27, 217, 573, 74, 776, 1590, 339, 66], 'shape': [20, 4519]}}), ('IC0098W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.27.ark:1824808', 'shape': [886, 40]}, 'output': {'text': '但对于六月份的成交数据也有一定刺激开发商掌握楼市五一和十一黄金周两个节点', 'tokenid': [552, 935, 1398, 283, 745, 858, 20, 309, 81, 1131, 1775, 229, 107, 66, 87, 1142, 793, 95, 570, 1510, 2073, 2296, 1294, 22, 401, 66, 232, 27, 66, 489, 497, 227, 505, 39, 433, 74], 'shape': [36, 4519]}}), ('IC0007W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:333006', 'shape': [885, 40]}, 'output': {'text': '而在今年十月底雅戈尔MAYOR馆杭州开业时', 'tokenid': [1044, 51, 267, 303, 27, 745, 1195, 498, 2812, 529, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 762, 580, 226, 95, 575, 83], 'shape': [21, 4519]}}), ('IC0095W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:8676976', 'shape': [885, 40]}, 'output': {'text': '上海宝马拖拽交警致死案司机翻供否认故意伤害', 'tokenid': [54, 448, 778, 643, 2179, 3532, 81, 949, 1054, 502, 566, 929, 544, 1815, 2214, 61, 199, 362, 439, 209, 1119], 'shape': [21, 4519]}}), ('IC0003W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7869014', 'shape': [882, 40]}, 'output': {'text': '五岁男童被罚喝二十杯水致肾积水回应只喝十次', 'tokenid': [401, 796, 995, 1068, 392, 2030, 870, 760, 27, 1638, 98, 1054, 3081, 1621, 98, 511, 677, 96, 870, 27, 1204], 'shape': [21, 4519]}}), ('IC0083W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:4534489', 'shape': [882, 40]}, 'output': {'text': '三轮车被拦截突然加速协警遭拖行数米多处受伤', 'tokenid': [24, 1030, 129, 392, 2330, 2875, 789, 408, 285, 62, 2369, 949, 2600, 2179, 137, 1131, 447, 147, 665, 504, 209], 'shape': [21, 4519]}}), ('ID0028W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.37.ark:4427378', 'shape': [882, 40]}, 'output': {'text': '本部长藤本松下将B', 'tokenid': [105, 1081, 176, 2167, 105, 354, 142, 1611, '<unk>'], 'shape': [9, 4519]}}), ('IC0009W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:6100369', 'shape': [879, 40]}, 'output': {'text': '今年六十来岁的王某化名是蚌埠市某公司管理人员', 'tokenid': [267, 303, 283, 27, 154, 796, 20, 595, 71, 1351, 444, 60, 3075, 3076, 22, 71, 172, 929, 897, 666, 72, 994], 'shape': [22, 4519]}})]]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('ID0041W0413', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5453061', 'shape': [849, 40]}, 'output': {'text': '中海集运二零一一年全年净污损达七亿元', 'tokenid': [347, 448, 725, 625, 760, 783, 66, 66, 303, 11, 303, 582, 2658, 1431, 752, 748, 2721, 1323], 'shape': [18, 4519]}}), ('IC0007W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6598833', 'shape': [848, 40]}, 'output': {'text': '有天坐巴士去赤柱以及去愉景湾参加朋友婚礼', 'tokenid': [107, 78, 128, 763, 788, 125, 533, 3101, 48, 435, 125, 1464, 266, 1917, 1326, 285, 477, 478, 838, 1411], 'shape': [20, 4519]}}), ('IC0091W0496', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.21.ark:1598749', 'shape': [848, 40]}, 'output': {'text': '上海国际和平妇幼保健院急诊科一名医生遭女患者殴打', 'tokenid': [54, 448, 468, 1166, 232, 1223, 300, 2204, 628, 563, 515, 455, 1677, 1523, 66, 444, 841, 25, 2600, 766, 2977, 281, 4393, 94], 'shape': [24, 4519]}}), ('IC0003W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8489898', 'shape': [847, 40]}, 'output': {'text': '五旬光棍相亲被骗红包持刀砍媒人获刑不愿赔偿', 'tokenid': [401, 2840, 452, 2277, 831, 615, 392, 1498, 457, 872, 1485, 1102, 2077, 2259, 72, 1963, 1233, 42, 1378, 2048, 3123], 'shape': [21, 4519]}}), ('ID0046W0427', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:4984794', 'shape': [845, 40]}, 'output': {'text': '从零岁的婴儿至旧会徽的设计者佐野研二郎均可以参加', 'tokenid': [371, 783, 796, 20, 2970, 249, 668, 2014, 174, 2000, 20, 683, 1305, 281, 1861, 485, 1918, 760, 2042, 671, 47, 48, 1326, 285], 'shape': [24, 4519]}}), ('IC0080W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5178300', 'shape': [844, 40]}, 'output': {'text': '三元桥启动换梁大修将在夜间占用三环进行施工', 'tokenid': [24, 1323, 1293, 1356, 323, 124, 827, 217, 698, 1611, 51, 696, 84, 1143, 261, 24, 402, 1566, 137, 1178, 776], 'shape': [21, 4519]}}), ('IC0080W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5269279', 'shape': [844, 40]}, 'output': {'text': '一艘渔船在长寿区长江川维码头附近触礁翻船', 'tokenid': [66, 2918, 3739, 1028, 51, 176, 1720, 396, 176, 356, 1334, 1843, 1189, 118, 18, 19, 2534, 3182, 1815, 1028], 'shape': [20, 4519]}}), ('IC0085W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2666234', 'shape': [842, 40]}, 'output': {'text': '三门峡泄洪村民冒险捞鱼被困警用直升机紧急救援', 'tokenid': [24, 192, 1528, 3428, 1416, 496, 12, 1296, 629, 3470, 932, 392, 1468, 949, 261, 1041, 403, 544, 1392, 455, 1347, 3814], 'shape': [22, 4519]}}), ('IC0096W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:8936948', 'shape': [841, 40]}, 'output': {'text': '上海小客车总量达三百二十万辆未来两年或增八十万辆', 'tokenid': [54, 448, 7, 1364, 129, 878, 150, 752, 24, 988, 760, 27, 1569, 1604, 1784, 154, 505, 303, 280, 1836, 777, 27, 1569, 1604], 'shape': [24, 4519]}}), ('IC0096W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9105539', 'shape': [841, 40]}, 'output': {'text': '因为前晚杨浦区市场监督管理局的一纸封条', 'tokenid': [377, 200, 931, 73, 337, 2013, 396, 22, 545, 2050, 2051, 897, 666, 1457, 20, 66, 2122, 1716, 1016], 'shape': [19, 4519]}}), ('IC0006W0160', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:1065543', 'shape': [840, 40]}, 'output': {'text': '本公司的房地产开发及进出口贸易业务将被全部剥离', 'tokenid': [105, 172, 929, 20, 1362, 52, 1300, 95, 570, 435, 1566, 56, 944, 2643, 82, 575, 1018, 1611, 392, 11, 1081, 3671, 899], 'shape': [23, 4519]}}), ('IC0080W0453', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5001869', 'shape': [840, 40]}, 'output': {'text': '三人网购他人身份证办信用卡卖四十多张获利万元', 'tokenid': [24, 72, 651, 111, 194, 72, 701, 858, 859, 165, 369, 261, 1007, 397, 366, 27, 147, 474, 1963, 526, 1569, 1323], 'shape': [22, 4519]}}), ('IC0002W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1664231', 'shape': [839, 40]}, 'output': {'text': '也通过大幅度调整对普通住宅总价认定标准的方式', 'tokenid': [229, 178, 420, 217, 3133, 139, 160, 658, 935, 971, 178, 381, 1597, 878, 399, 199, 87, 55, 35, 20, 292, 821], 'shape': [22, 4519]}}), ('IC0096W0325', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6469976', 'shape': [839, 40]}, 'output': {'text': '各地要将充电基础设施配套电网建设与改造项目', 'tokenid': [1115, 52, 99, 1611, 411, 30, 228, 2280, 683, 1178, 1460, 1866, 30, 651, 917, 683, 501, 630, 1855, 2447, 723], 'shape': [21, 4519]}}), ('ID0046W0472', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5820679', 'shape': [839, 40]}, 'output': {'text': '继续实施农业种子种苗种畜种禽免税进口优惠政策', 'tokenid': [2137, 1856, 1207, 1178, 1065, 575, 568, 393, 568, 1540, 568, 3616, 568, 3453, 890, 2064, 1566, 944, 112, 113, 2203, 2730], 'shape': [22, 4519]}}), ('IC0009W0395', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4089674', 'shape': [838, 40]}, 'output': {'text': '肯尼亚队以二金二银二铜在金牌榜和奖牌榜上暂时领跑', 'tokenid': [1534, 1608, 527, 1060, 48, 760, 497, 760, 659, 760, 1059, 51, 497, 1252, 1077, 232, 1173, 1252, 1077, 54, 1499, 83, 909, 1021], 'shape': [24, 4519]}}), ('IC0080W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4862071', 'shape': [838, 40]}, 'output': {'text': '丰台检察院以诈骗罪对胡某路某吴某提起公诉', 'tokenid': [1437, 67, 1568, 2256, 515, 48, 3373, 1498, 506, 935, 814, 71, 327, 71, 331, 71, 239, 404, 172, 260], 'shape': [20, 4519]}}), ('ID0043W0330', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3779769', 'shape': [838, 40]}, 'output': {'text': '五亿九千九百五十二万五千零九十五点五一', 'tokenid': [401, 2721, 419, 1242, 419, 988, 401, 27, 760, 1569, 401, 1242, 783, 419, 27, 401, 74, 401, 66], 'shape': [19, 4519]}}), ('IC0085W0283', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8597211', 'shape': [837, 40]}, 'output': {'text': '该市场在二零三零年之前规模将达到七百亿美元之巨', 'tokenid': [569, 22, 545, 51, 760, 783, 24, 783, 303, 339, 931, 1769, 820, 1611, 752, 75, 748, 988, 2721, 567, 1323, 339, 2541], 'shape': [23, 4519]}}), ('IC0002W0435', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7209787', 'shape': [836, 40]}, 'output': {'text': '五岁智障儿走失三百天母亲曾放弃寻找悔恨不已', 'tokenid': [401, 796, 1768, 1718, 249, 520, 716, 24, 988, 78, 2078, 615, 1935, 6, 1682, 1070, 388, 2787, 901, 42, 1197], 'shape': [21, 4519]}}), ('IC0080W0356', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3020101', 'shape': [836, 40]}, 'output': {'text': '那就是以三十二的微弱劣势不敌山东鲁能的老将张超', 'tokenid': [245, 220, 60, 48, 24, 27, 760, 20, 719, 846, 2609, 1551, 42, 2286, 523, 359, 1799, 177, 20, 282, 1611, 474, 21], 'shape': [23, 4519]}}), ('ID0023W0477', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:5542934', 'shape': [836, 40]}, 'output': {'text': '正式对外宣布队长惠若琪因身体原因将缺席前半个赛季', 'tokenid': [250, 821, 935, 215, 663, 985, 1060, 176, 113, 584, 1221, 377, 701, 451, 364, 377, 1611, 925, 2028, 931, 1170, 39, 1180, 1383], 'shape': [24, 4519]}}), ('IC0002W0362', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5797738', 'shape': [835, 40]}, 'output': {'text': '有望首先令杭州以及浙江当地的基建上市公司获益', 'tokenid': [107, 352, 207, 564, 2435, 580, 226, 48, 435, 2008, 356, 472, 52, 20, 228, 917, 54, 22, 172, 929, 1963, 2279], 'shape': [22, 4519]}}), ('IC0009W0232', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:976375', 'shape': [835, 40]}, 'output': {'text': '国际社会倾向于用完全不同于美国的眼光来看待中国', 'tokenid': [468, 1166, 1572, 174, 314, 543, 1398, 261, 37, 11, 42, 426, 1398, 567, 468, 20, 256, 452, 154, 121, 197, 347, 468], 'shape': [23, 4519]}}), ('IC0009W0375', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3696974', 'shape': [834, 40]}, 'output': {'text': '每支队伍派二名队员完成约八十米高的速降', 'tokenid': [469, 1168, 1060, 779, 753, 760, 444, 1060, 994, 37, 309, 686, 777, 27, 447, 246, 20, 62, 591], 'shape': [19, 4519]}}), ('ID0041W0488', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7110456', 'shape': [834, 40]}, 'output': {'text': '跑者应该有耐心循环渐进的延长跑步距离', 'tokenid': [1021, 281, 677, 569, 107, 1393, 163, 2469, 402, 1830, 1566, 20, 1158, 176, 1021, 345, 167, 899], 'shape': [18, 4519]}}), ('ID0043W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:1378790', 'shape': [834, 40]}, 'output': {'text': '我想要将AM设到一零六点九兆赫', 'tokenid': [17, 130, 99, 1611, '<unk>', '<unk>', 683, 75, 66, 783, 283, 74, 419, 2955, 2404], 'shape': [15, 4519]}}), ('ID0042W0474', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6176382', 'shape': [832, 40]}, 'output': {'text': '沈某一个月四次进入宗某家行窃重庆晚报讯家中被偷', 'tokenid': [1610, 71, 66, 39, 745, 366, 1204, 1566, 346, 597, 71, 417, 137, 3861, 573, 834, 73, 326, 1992, 417, 347, 392, 1344], 'shape': [23, 4519]}}), ('IC0007W0451', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7066999', 'shape': [831, 40]}, 'output': {'text': '宿迁市宿城公警官破获一冒充军官跨省诈骗团伙', 'tokenid': [1283, 2508, 22, 1283, 480, 172, 949, 1234, 1319, 1963, 66, 1296, 411, 773, 1234, 1659, 514, 3373, 1498, 110, 684], 'shape': [21, 4519]}}), ('ID0040W0499', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.39.ark:6918320', 'shape': [831, 40]}, 'output': {'text': '顾客消费签帐达一定金额可捐款公益金换购雨伞与雨衣', 'tokenid': [822, 1364, 1012, 1005, 1959, 2119, 752, 66, 87, 497, 170, 47, 4094, 104, 172, 2279, 497, 124, 111, 218, 1445, 501, 218, 40], 'shape': [24, 4519]}}), ('IC0080W0434', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4581482', 'shape': [830, 40]}, 'output': {'text': '三亚海滩中秋夜垃圾八十吨台媒十米一个垃圾桶', 'tokenid': [24, 527, 448, 2109, 347, 1424, 696, 1330, 1331, 777, 27, 2853, 67, 2259, 27, 447, 66, 39, 1330, 1331, 1764], 'shape': [21, 4519]}}), ('IC0080W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5823210', 'shape': [830, 40]}, 'output': {'text': '三名九零后酒后殴打出租司机致死逃窜三年被抓获', 'tokenid': [24, 444, 419, 783, 114, 871, 114, 4393, 94, 56, 2125, 929, 544, 1054, 502, 2023, 3997, 24, 303, 392, 1955, 1963], 'shape': [22, 4519]}})]]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------\n",
      "\n",
      "-----------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0007W0264', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:3152908', 'shape': [927, 40]}, 'output': {'text': 'CX六百采用的也并非是FIREFOX操作系统', 'tokenid': ['<unk>', '<unk>', 283, 988, 2394, 261, 20, 229, 1073, 959, 60, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 536, 547, 1009, 1072], 'shape': [22, 4519]}}), ('IC0085W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:193509', 'shape': [926, 40]}, 'output': {'text': '并成功实现了商流信息流资金流与物流的四流合一', 'tokenid': [1073, 309, 1100, 1207, 136, 63, 1510, 149, 369, 997, 149, 1563, 497, 149, 501, 132, 149, 20, 366, 149, 45, 66], 'shape': [22, 4519]}}), ('IC0082W0021', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.11.ark:5199439', 'shape': [924, 40]}, 'output': {'text': '幺九四九THEHIDDENCCITY三里屯店', 'tokenid': [1844, 419, 366, 419, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 24, 10, 2265, 106], 'shape': [22, 4519]}}), ('ID0042W0179', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:1680666', 'shape': [922, 40]}, 'output': {'text': '烦请把空调温度调到调大到二十六度烦请把空调温度调大到二十六度', 'tokenid': [349, 2, 92, 264, 160, 161, 139, 160, 75, 160, 217, 75, 760, 27, 283, 139, 349, 2, 92, 264, 160, 161, 139, 160, 217, 75, 760, 27, 283, 139], 'shape': [30, 4519]}}), ('IC0087W0350', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:312919', 'shape': [921, 40]}, 'output': {'text': '昆仑决周口站闫西波剑指帽子戏法搜狐体育', 'tokenid': [1773, 2007, 1366, 227, 944, 774, 304, 873, 1735, 1415, 134, 1040, 393, 662, 187, 15, 2159, 451, 1184], 'shape': [19, 4519]}}), ('IC0002W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:4645365', 'shape': [920, 40]}, 'output': {'text': '从智能手表到带嵌入式电子的健身手镯珠宝和衣服', 'tokenid': [371, 1768, 177, 241, 1116, 75, 494, 3939, 346, 821, 30, 393, 20, 563, 701, 241, 3698, 1791, 778, 232, 40, 41], 'shape': [22, 4519]}}), ('IC0080W0357', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3053894', 'shape': [918, 40]}, 'output': {'text': '不管是新科世锦冠军马龙还是直板第一人许昕', 'tokenid': [42, 897, 60, 184, 1523, 26, 1298, 1462, 773, 643, 1062, 146, 60, 1041, 1317, 254, 66, 72, 540, 2087], 'shape': [20, 4519]}}), ('IC0007W0247', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2759707', 'shape': [914, 40]}, 'output': {'text': '年底前全面实施儿童白血病等八个病种的大病保障', 'tokenid': [303, 1195, 931, 11, 216, 1207, 1178, 249, 1068, 1042, 1214, 843, 537, 777, 39, 843, 568, 20, 217, 843, 628, 1718], 'shape': [22, 4519]}}), ('IC0008W0241', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:2079482', 'shape': [914, 40]}, 'output': {'text': '将推动保险资产向更长久期和更低风险资产配置过渡', 'tokenid': [1611, 64, 323, 628, 629, 1563, 1300, 543, 557, 176, 1024, 196, 232, 557, 991, 145, 629, 1563, 1300, 1460, 58, 420, 1522], 'shape': [23, 4519]}}), ('ID0047W0515', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:7111197', 'shape': [914, 40]}, 'output': {'text': '新股民亏四零万很闹心吃饭时心不在焉被枣核卡喉', 'tokenid': [184, 950, 12, 2802, 366, 783, 1569, 244, 1013, 163, 853, 852, 83, 163, 42, 51, 3200, 392, 2610, 1562, 1007, 2359], 'shape': [22, 4519]}}), ('IC0003W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:2391636', 'shape': [912, 40]}, 'output': {'text': '房企跨界触网营销等实质上都是在打抢客户大战', 'tokenid': [1362, 3202, 1659, 1125, 2534, 651, 1942, 977, 537, 1207, 1926, 54, 173, 60, 51, 94, 2037, 1364, 168, 217, 622], 'shape': [21, 4519]}}), ('IC0003W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8453225', 'shape': [908, 40]}, 'output': {'text': '皇姑区人民法院公布了这起民事纠纷案件的判决结果', 'tokenid': [1456, 710, 396, 72, 12, 187, 515, 172, 985, 63, 9, 404, 12, 363, 2899, 2771, 566, 1105, 20, 1786, 1366, 893, 237], 'shape': [23, 4519]}}), ('IC0002W0412', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:6772348', 'shape': [904, 40]}, 'output': {'text': '她透露日前以大使身份到四川探访眼疾病童四日', 'tokenid': [886, 1554, 1491, 521, 931, 48, 217, 654, 701, 858, 75, 366, 1334, 1503, 2386, 256, 3402, 843, 1068, 366, 521], 'shape': [21, 4519]}}), ('IC0096W0376', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7521059', 'shape': [902, 40]}, 'output': {'text': '上海静安队的朱莹洁首先在跪射阶段处于落后状态', 'tokenid': [54, 448, 828, 720, 1060, 20, 1615, 1652, 1532, 207, 564, 51, 491, 2198, 1164, 518, 665, 1398, 717, 114, 1776, 679], 'shape': [22, 4519]}}), ('IC0009W0458', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5391553', 'shape': [900, 40]}, 'output': {'text': '六旬男给九零后小三买车房女孩与人暧昧被逼写承诺书', 'tokenid': [283, 2840, 995, 70, 419, 783, 114, 7, 24, 616, 129, 1362, 766, 866, 501, 72, 2249, 2250, 392, 2357, 861, 1121, 1122, 697], 'shape': [24, 4519]}}), ('IC0002W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7683440', 'shape': [899, 40]}, 'output': {'text': '妈妈带着鹏鹏前往顺义区首儿李桥儿童医院口腔科看病', 'tokenid': [939, 939, 494, 341, 2262, 2262, 931, 1675, 1050, 1399, 396, 207, 249, 414, 1293, 249, 1068, 841, 515, 944, 2490, 1523, 121, 843], 'shape': [24, 4519]}}), ('IC0085W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2714460', 'shape': [899, 40]}, 'output': {'text': '三门峡市湖滨区会兴镇东坡村段黄河河岸突然塌陷', 'tokenid': [24, 192, 1528, 22, 464, 2193, 396, 174, 1270, 819, 359, 1853, 496, 518, 489, 223, 223, 1763, 789, 408, 3589, 3354], 'shape': [22, 4519]}}), ('ID0047W0396', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4697510', 'shape': [898, 40]}, 'output': {'text': '欣赏到了奇幻魔术秀面部彩绘等精彩节目', 'tokenid': [689, 690, 75, 63, 895, 465, 713, 1032, 984, 216, 1081, 1599, 3297, 537, 1598, 1599, 433, 723], 'shape': [18, 4519]}}), ('IC0007W0373', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5497905', 'shape': [897, 40]}, 'output': {'text': '他在摩纳哥站投出了两二十二秒五六的个人最好成绩', 'tokenid': [194, 51, 1999, 2782, 891, 774, 49, 56, 63, 505, 760, 27, 760, 1613, 401, 283, 20, 39, 72, 208, 120, 309, 1581], 'shape': [23, 4519]}}), ('IC0007W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2066192', 'shape': [895, 40]}, 'output': {'text': '符合医保定点相关规定的非公立医疗机构才可以申请', 'tokenid': [1346, 45, 841, 628, 87, 74, 831, 179, 1769, 87, 20, 959, 172, 1369, 841, 3167, 544, 1594, 673, 47, 48, 2202, 2], 'shape': [23, 4519]}}), ('IC0007W0493', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:8098425', 'shape': [892, 40]}, 'output': {'text': '六岁娃处女膜破裂学校录像离奇被剪疑遭校长猥琐', 'tokenid': [283, 796, 1549, 665, 766, 2785, 1319, 1506, 238, 516, 290, 117, 899, 895, 392, 1728, 1258, 2600, 516, 176, 2079, 2080], 'shape': [22, 4519]}}), ('IC0008W0393', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4872258', 'shape': [892, 40]}, 'output': {'text': '沙特阿拉伯选手马马斯拉赫以四十三秒九三预获得预赛第一', 'tokenid': [765, 336, 373, 188, 1375, 687, 241, 643, 643, 1061, 188, 2404, 48, 366, 27, 24, 1613, 419, 24, 32, 1963, 471, 32, 1180, 254, 66], 'shape': [26, 4519]}}), ('IC0085W0305', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:157476', 'shape': [892, 40]}, 'output': {'text': '信E付将面向全行业用户提供上下游供应链服务新模式', 'tokenid': [369, '<unk>', 103, 1611, 216, 543, 11, 137, 575, 261, 168, 239, 2214, 54, 142, 661, 2214, 677, 1186, 41, 1018, 184, 820, 821], 'shape': [24, 4519]}}), ('ID0049W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.48.ark:4718586', 'shape': [891, 40]}, 'output': {'text': '郭喜闯Vs叶夫根叶夫根尼沃龙科夫罗斯', 'tokenid': [1126, 88, 1247, '<unk>', 702, 797, 1066, 219, 797, 1066, 219, 1608, 2796, 1062, 1523, 1066, 1127, 1061], 'shape': [18, 4519]}}), ('ID0043W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5606517', 'shape': [887, 40]}, 'output': {'text': '白马塔大桥是敦煌市确定的十大重点工程之一', 'tokenid': [1042, 643, 1930, 217, 1293, 60, 1785, 2049, 22, 1325, 87, 20, 27, 217, 573, 74, 776, 1590, 339, 66], 'shape': [20, 4519]}}), ('IC0098W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.27.ark:1824808', 'shape': [886, 40]}, 'output': {'text': '但对于六月份的成交数据也有一定刺激开发商掌握楼市五一和十一黄金周两个节点', 'tokenid': [552, 935, 1398, 283, 745, 858, 20, 309, 81, 1131, 1775, 229, 107, 66, 87, 1142, 793, 95, 570, 1510, 2073, 2296, 1294, 22, 401, 66, 232, 27, 66, 489, 497, 227, 505, 39, 433, 74], 'shape': [36, 4519]}}), ('IC0007W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:333006', 'shape': [885, 40]}, 'output': {'text': '而在今年十月底雅戈尔MAYOR馆杭州开业时', 'tokenid': [1044, 51, 267, 303, 27, 745, 1195, 498, 2812, 529, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 762, 580, 226, 95, 575, 83], 'shape': [21, 4519]}}), ('IC0095W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:8676976', 'shape': [885, 40]}, 'output': {'text': '上海宝马拖拽交警致死案司机翻供否认故意伤害', 'tokenid': [54, 448, 778, 643, 2179, 3532, 81, 949, 1054, 502, 566, 929, 544, 1815, 2214, 61, 199, 362, 439, 209, 1119], 'shape': [21, 4519]}}), ('IC0003W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7869014', 'shape': [882, 40]}, 'output': {'text': '五岁男童被罚喝二十杯水致肾积水回应只喝十次', 'tokenid': [401, 796, 995, 1068, 392, 2030, 870, 760, 27, 1638, 98, 1054, 3081, 1621, 98, 511, 677, 96, 870, 27, 1204], 'shape': [21, 4519]}}), ('IC0083W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:4534489', 'shape': [882, 40]}, 'output': {'text': '三轮车被拦截突然加速协警遭拖行数米多处受伤', 'tokenid': [24, 1030, 129, 392, 2330, 2875, 789, 408, 285, 62, 2369, 949, 2600, 2179, 137, 1131, 447, 147, 665, 504, 209], 'shape': [21, 4519]}}), ('ID0028W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.37.ark:4427378', 'shape': [882, 40]}, 'output': {'text': '本部长藤本松下将B', 'tokenid': [105, 1081, 176, 2167, 105, 354, 142, 1611, '<unk>'], 'shape': [9, 4519]}}), ('IC0009W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:6100369', 'shape': [879, 40]}, 'output': {'text': '今年六十来岁的王某化名是蚌埠市某公司管理人员', 'tokenid': [267, 303, 283, 27, 154, 796, 20, 595, 71, 1351, 444, 60, 3075, 3076, 22, 71, 172, 929, 897, 666, 72, 994], 'shape': [22, 4519]}})][('IC0096W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:5912144', 'shape': [1325, 40]}, 'output': {'text': '就是加入了P点对点对等网络传输的更新方式', 'tokenid': [220, 60, 285, 346, 63, '<unk>', 74, 935, 74, 935, 537, 651, 652, 664, 1746, 20, 557, 184, 292, 821], 'shape': [20, 4519]}}), ('IC0094W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:36078', 'shape': [1166, 40]}, 'output': {'text': '王辰澎湃资料八月十八日下午二点二十分左右在运行时梯街突然跳起', 'tokenid': [595, 1150, 2890, 3877, 1563, 1564, 777, 745, 27, 777, 521, 142, 966, 760, 74, 760, 27, 421, 1824, 2063, 51, 625, 137, 83, 1309, 127, 789, 408, 1635, 404], 'shape': [30, 4519]}}), ('ID0042W0471', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6090243', 'shape': [1146, 40]}, 'output': {'text': '但作为深圳国资委旗下最大的房产房地产但作为深圳国资委旗下最大的房地产公司', 'tokenid': [552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 1300, 1362, 52, 1300, 552, 547, 200, 695, 1193, 468, 1563, 1721, 1890, 142, 208, 217, 20, 1362, 52, 1300, 172, 929], 'shape': [36, 4519]}}), ('IC0007W0369', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5394213', 'shape': [1101, 40]}, 'output': {'text': '他的主要对手是美国是田径锦赛锦标赛的金牌得主乔科斯瓦斯', 'tokenid': [194, 20, 824, 99, 935, 241, 60, 567, 468, 60, 484, 2219, 1298, 1180, 1298, 55, 1180, 20, 497, 1252, 471, 824, 624, 1523, 1061, 914, 1061], 'shape': [27, 4519]}}), ('IC0080W0192', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9525870', 'shape': [1059, 40]}, 'output': {'text': '积极稳妥探索水利建设贷款等涉农贷款资产证券化试点', 'tokenid': [1621, 1089, 1737, 2257, 1503, 16, 98, 526, 917, 683, 2975, 104, 537, 1743, 1065, 2975, 104, 1563, 1300, 859, 952, 1351, 930, 74], 'shape': [24, 4519]}}), ('IC0003W0027', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8932830', 'shape': [1057, 40]}, 'output': {'text': '万达广场中共北京市石景山区委员会东', 'tokenid': [1569, 752, 365, 545, 347, 503, 224, 360, 22, 912, 266, 523, 396, 1721, 994, 174, 359], 'shape': [17, 4519]}}), ('IC0003W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9076930', 'shape': [1043, 40]}, 'output': {'text': '五种高价中药材价格普降超两成一半品种都在下跌', 'tokenid': [401, 568, 246, 399, 347, 876, 1605, 399, 400, 971, 591, 21, 505, 309, 66, 1170, 490, 568, 173, 51, 142, 2462], 'shape': [22, 4519]}}), ('IC0095W0392', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:7196508', 'shape': [1032, 40]}, 'output': {'text': '拿过上海市五一杯长跑比赛冠军杨浦区迎春长跑冠军', 'tokenid': [933, 420, 54, 448, 22, 401, 66, 1638, 176, 1021, 268, 1180, 1462, 773, 337, 2013, 396, 2342, 647, 176, 1021, 1462, 773], 'shape': [23, 4519]}}), ('IC0003W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8547324', 'shape': [1029, 40]}, 'output': {'text': '五村官私分七二一救灾款获刑获刑两年半至三年', 'tokenid': [401, 496, 1234, 1589, 421, 748, 760, 66, 1347, 3517, 104, 1963, 1233, 1963, 1233, 505, 303, 1170, 668, 24, 303], 'shape': [21, 4519]}}), ('IC0080W0194', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9594096', 'shape': [1029, 40]}, 'output': {'text': '建立金融支持水利改革发展的风险分散和政策保障机制', 'tokenid': [917, 1369, 497, 2478, 1168, 1485, 98, 526, 630, 1592, 570, 2053, 20, 145, 629, 421, 1379, 232, 2203, 2730, 628, 1718, 544, 278], 'shape': [24, 4519]}}), ('ID0043W0445', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5565524', 'shape': [1016, 40]}, 'output': {'text': '三盗墓者酒后捅伤的哥一人背部文有钟馗图案', 'tokenid': [24, 1765, 2016, 281, 871, 114, 3525, 209, 20, 891, 66, 72, 265, 1081, 772, 107, 1014, 4268, 53, 566], 'shape': [20, 4519]}}), ('ID0021W0389', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.30.ark:3999585', 'shape': [1012, 40]}, 'output': {'text': '二零一四财年收入下降四点百分之八至二二七八百六五十二亿日元', 'tokenid': [760, 783, 66, 366, 1715, 303, 131, 346, 142, 591, 366, 74, 988, 421, 339, 777, 668, 760, 760, 748, 777, 988, 283, 401, 27, 760, 2721, 521, 1323], 'shape': [29, 4519]}}), ('IC0003W0352', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:6187541', 'shape': [1009, 40]}, 'output': {'text': '它们分别是布达佩斯汉堡洛杉矶巴黎和罗马', 'tokenid': [262, 198, 421, 279, 60, 985, 752, 1376, 1061, 355, 2648, 273, 2015, 3882, 763, 1228, 232, 1127, 643], 'shape': [19, 4519]}}), ('IC0007W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:5268808', 'shape': [1005, 40]}, 'output': {'text': '二零一三年世锦赛铜牌的得主卢克斯梅里赫也不容忽视', 'tokenid': [760, 783, 66, 24, 303, 26, 1298, 1180, 1059, 1252, 20, 471, 824, 1138, 317, 1061, 835, 10, 2404, 229, 42, 603, 1923, 93], 'shape': [24, 4519]}}), ('IC0006W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:7056553', 'shape': [1001, 40]}, 'output': {'text': '六十九岁独腿环卫工拄拐杖扫大街七年半小时扫三百米', 'tokenid': [283, 27, 419, 796, 335, 1995, 402, 823, 776, '<unk>', 2424, 3955, 1363, 217, 127, 748, 303, 1170, 7, 83, 1363, 24, 988, 447], 'shape': [24, 4519]}}), ('ID0043W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5971158', 'shape': [985, 40]}, 'output': {'text': '香港航空HX三零四飞往北京的航班延误九小时', 'tokenid': [1113, 1135, 86, 264, '<unk>', '<unk>', 24, 783, 366, 406, 1675, 224, 360, 20, 86, 143, 1158, 1159, 419, 7, 83], 'shape': [21, 4519]}}), ('IC0096W0323', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6414270', 'shape': [980, 40]}, 'output': {'text': '每二千辆电动汽车至少配套建设一座公交充电站', 'tokenid': [469, 760, 1242, 1604, 30, 323, 1306, 129, 668, 148, 1460, 1866, 917, 683, 66, 69, 172, 81, 411, 30, 774], 'shape': [21, 4519]}}), ('IC0009W0387', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3931850', 'shape': [966, 40]}, 'output': {'text': '山海关区旅游局山海关区体育局以及各相关单位承办', 'tokenid': [523, 448, 179, 396, 1571, 661, 1457, 523, 448, 179, 396, 451, 1184, 1457, 48, 435, 1115, 831, 179, 159, 57, 1121, 165], 'shape': [23, 4519]}}), ('IC0007W0478', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7739130', 'shape': [962, 40]}, 'output': {'text': '六岁女孩肺功能衰竭如八旬老人病因成谜呼气吃力', 'tokenid': [283, 796, 766, 866, 1588, 1100, 177, 2870, 2700, 236, 777, 2840, 282, 72, 843, 377, 309, 3038, 1110, 79, 853, 270], 'shape': [22, 4519]}}), ('IC0002W0363', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5831491', 'shape': [959, 40]}, 'output': {'text': '精工钢构亚厦股份杭钢股份等值得关注', 'tokenid': [1598, 776, 440, 1594, 527, 1846, 950, 858, 580, 440, 950, 858, 537, 883, 471, 179, 438], 'shape': [17, 4519]}}), ('ID0043W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3286257', 'shape': [959, 40]}, 'output': {'text': '九亿五千九百七十四万两千四百零五点一三', 'tokenid': [419, 2721, 401, 1242, 419, 988, 748, 27, 366, 1569, 505, 1242, 366, 988, 783, 401, 74, 66, 24], 'shape': [19, 4519]}}), ('IC0009W0414', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4539381', 'shape': [955, 40]}, 'output': {'text': '搜狐娱乐讯刘嘉玲日前戴上半亿珠宝现身北京出席活动', 'tokenid': [15, 2159, 183, 157, 1992, 583, 1108, 1924, 521, 931, 1849, 54, 1170, 2721, 1791, 778, 136, 701, 224, 360, 56, 2028, 1584, 323], 'shape': [24, 4519]}}), ('ID0041W0470', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:6703502', 'shape': [955, 40]}, 'output': {'text': '而六月居冠的天河北成交宗数下近百分之二', 'tokenid': [1044, 283, 745, 1263, 1462, 20, 78, 223, 224, 309, 81, 597, 1131, 142, 19, 988, 421, 339, 760], 'shape': [19, 4519]}}), ('IC0009W0468', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5629083', 'shape': [954, 40]}, 'output': {'text': '六十五岁的通许县人潘安在郑州市做学生课桌椅加工生意', 'tokenid': [283, 27, 401, 796, 20, 178, 540, 307, 72, 2111, 720, 51, 201, 226, 22, 263, 238, 25, 353, 1284, 1975, 285, 776, 25, 439], 'shape': [25, 4519]}}), ('IC0007W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7463127', 'shape': [952, 40]}, 'output': {'text': '他们结伴去位于觐州奉化交界的卖柴岳徒步', 'tokenid': [194, 198, 893, 409, 125, 57, 1398, '<unk>', 226, 2944, 1351, 81, 1125, 20, 397, 1118, 1501, 1905, 345], 'shape': [19, 4519]}}), ('IC0096W0122', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2248237', 'shape': [950, 40]}, 'output': {'text': '不仅是北京上海广州等一线城市地块溢价率在走高', 'tokenid': [42, 1891, 60, 224, 360, 54, 448, 365, 226, 537, 66, 284, 480, 22, 52, 907, 1989, 399, 1593, 51, 520, 246], 'shape': [22, 4519]}}), ('ID0046W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5373814', 'shape': [947, 40]}, 'output': {'text': '自自己只是把二传手们喜欢的两次吊球变成两次进攻而已', 'tokenid': [275, 275, 276, 96, 60, 92, 760, 664, 241, 198, 88, 89, 20, 505, 1204, 3191, 1039, 730, 309, 505, 1204, 1566, 845, 1044, 1197], 'shape': [25, 4519]}}), ('IC0009W0166', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8596778', 'shape': [934, 40]}, 'output': {'text': '同比增长百分之二十一点七净资产均值为二百零二点三亿元', 'tokenid': [426, 268, 1836, 176, 988, 421, 339, 760, 27, 66, 74, 748, 582, 1563, 1300, 671, 883, 200, 760, 988, 783, 760, 74, 24, 2721, 1323], 'shape': [26, 4519]}}), ('IC0009W0402', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4272465', 'shape': [934, 40]}, 'output': {'text': '以色列选手米宁科十四米七八创造国家纪录摘得银牌', 'tokenid': [48, 724, 429, 687, 241, 447, 1069, 1523, 27, 366, 447, 748, 777, 1299, 1855, 468, 417, 1694, 290, 3220, 471, 659, 1252], 'shape': [23, 4519]}}), ('IC0087W0491', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.17.ark:2815299', 'shape': [934, 40]}, 'output': {'text': '上海一大巴司机俯身捡手机致翻车六名乘客死亡', 'tokenid': [54, 448, 66, 217, 763, 929, 544, '<unk>', 701, 3062, 241, 544, 1054, 1815, 129, 283, 444, 1771, 1364, 502, 2024], 'shape': [21, 4519]}}), ('IC0089W0364', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.18.ark:7895628', 'shape': [931, 40]}, 'output': {'text': '五组队员以二十四小时总跑量二万七千九百七十五点五三的成绩完美的结束了此次比赛', 'tokenid': [401, 911, 1060, 994, 48, 760, 27, 366, 7, 83, 878, 1021, 150, 760, 1569, 748, 1242, 419, 988, 748, 27, 401, 74, 401, 24, 20, 309, 1581, 37, 567, 20, 893, 894, 63, 1099, 1204, 268, 1180], 'shape': [38, 4519]}}), ('IC0002W0133', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1397741', 'shape': [930, 40]}, 'output': {'text': '其中包括对拥有一套住房并已结清相应购房贷款的家庭', 'tokenid': [1144, 347, 872, 1052, 935, 1825, 107, 66, 1866, 381, 1362, 1073, 1197, 893, 768, 831, 677, 111, 1362, 2975, 104, 20, 417, 1770], 'shape': [24, 4519]}})]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------[('ID0041W0413', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5453061', 'shape': [849, 40]}, 'output': {'text': '中海集运二零一一年全年净污损达七亿元', 'tokenid': [347, 448, 725, 625, 760, 783, 66, 66, 303, 11, 303, 582, 2658, 1431, 752, 748, 2721, 1323], 'shape': [18, 4519]}}), ('IC0007W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6598833', 'shape': [848, 40]}, 'output': {'text': '有天坐巴士去赤柱以及去愉景湾参加朋友婚礼', 'tokenid': [107, 78, 128, 763, 788, 125, 533, 3101, 48, 435, 125, 1464, 266, 1917, 1326, 285, 477, 478, 838, 1411], 'shape': [20, 4519]}}), ('IC0091W0496', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.21.ark:1598749', 'shape': [848, 40]}, 'output': {'text': '上海国际和平妇幼保健院急诊科一名医生遭女患者殴打', 'tokenid': [54, 448, 468, 1166, 232, 1223, 300, 2204, 628, 563, 515, 455, 1677, 1523, 66, 444, 841, 25, 2600, 766, 2977, 281, 4393, 94], 'shape': [24, 4519]}}), ('IC0003W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8489898', 'shape': [847, 40]}, 'output': {'text': '五旬光棍相亲被骗红包持刀砍媒人获刑不愿赔偿', 'tokenid': [401, 2840, 452, 2277, 831, 615, 392, 1498, 457, 872, 1485, 1102, 2077, 2259, 72, 1963, 1233, 42, 1378, 2048, 3123], 'shape': [21, 4519]}}), ('ID0046W0427', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:4984794', 'shape': [845, 40]}, 'output': {'text': '从零岁的婴儿至旧会徽的设计者佐野研二郎均可以参加', 'tokenid': [371, 783, 796, 20, 2970, 249, 668, 2014, 174, 2000, 20, 683, 1305, 281, 1861, 485, 1918, 760, 2042, 671, 47, 48, 1326, 285], 'shape': [24, 4519]}}), ('IC0080W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5178300', 'shape': [844, 40]}, 'output': {'text': '三元桥启动换梁大修将在夜间占用三环进行施工', 'tokenid': [24, 1323, 1293, 1356, 323, 124, 827, 217, 698, 1611, 51, 696, 84, 1143, 261, 24, 402, 1566, 137, 1178, 776], 'shape': [21, 4519]}}), ('IC0080W0463', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5269279', 'shape': [844, 40]}, 'output': {'text': '一艘渔船在长寿区长江川维码头附近触礁翻船', 'tokenid': [66, 2918, 3739, 1028, 51, 176, 1720, 396, 176, 356, 1334, 1843, 1189, 118, 18, 19, 2534, 3182, 1815, 1028], 'shape': [20, 4519]}}), ('IC0085W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2666234', 'shape': [842, 40]}, 'output': {'text': '三门峡泄洪村民冒险捞鱼被困警用直升机紧急救援', 'tokenid': [24, 192, 1528, 3428, 1416, 496, 12, 1296, 629, 3470, 932, 392, 1468, 949, 261, 1041, 403, 544, 1392, 455, 1347, 3814], 'shape': [22, 4519]}}), ('IC0096W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:8936948', 'shape': [841, 40]}, 'output': {'text': '上海小客车总量达三百二十万辆未来两年或增八十万辆', 'tokenid': [54, 448, 7, 1364, 129, 878, 150, 752, 24, 988, 760, 27, 1569, 1604, 1784, 154, 505, 303, 280, 1836, 777, 27, 1569, 1604], 'shape': [24, 4519]}}), ('IC0096W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9105539', 'shape': [841, 40]}, 'output': {'text': '因为前晚杨浦区市场监督管理局的一纸封条', 'tokenid': [377, 200, 931, 73, 337, 2013, 396, 22, 545, 2050, 2051, 897, 666, 1457, 20, 66, 2122, 1716, 1016], 'shape': [19, 4519]}}), ('IC0006W0160', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:1065543', 'shape': [840, 40]}, 'output': {'text': '本公司的房地产开发及进出口贸易业务将被全部剥离', 'tokenid': [105, 172, 929, 20, 1362, 52, 1300, 95, 570, 435, 1566, 56, 944, 2643, 82, 575, 1018, 1611, 392, 11, 1081, 3671, 899], 'shape': [23, 4519]}}), ('IC0080W0453', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5001869', 'shape': [840, 40]}, 'output': {'text': '三人网购他人身份证办信用卡卖四十多张获利万元', 'tokenid': [24, 72, 651, 111, 194, 72, 701, 858, 859, 165, 369, 261, 1007, 397, 366, 27, 147, 474, 1963, 526, 1569, 1323], 'shape': [22, 4519]}}), ('IC0002W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1664231', 'shape': [839, 40]}, 'output': {'text': '也通过大幅度调整对普通住宅总价认定标准的方式', 'tokenid': [229, 178, 420, 217, 3133, 139, 160, 658, 935, 971, 178, 381, 1597, 878, 399, 199, 87, 55, 35, 20, 292, 821], 'shape': [22, 4519]}}), ('IC0096W0325', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6469976', 'shape': [839, 40]}, 'output': {'text': '各地要将充电基础设施配套电网建设与改造项目', 'tokenid': [1115, 52, 99, 1611, 411, 30, 228, 2280, 683, 1178, 1460, 1866, 30, 651, 917, 683, 501, 630, 1855, 2447, 723], 'shape': [21, 4519]}}), ('ID0046W0472', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.45.ark:5820679', 'shape': [839, 40]}, 'output': {'text': '继续实施农业种子种苗种畜种禽免税进口优惠政策', 'tokenid': [2137, 1856, 1207, 1178, 1065, 575, 568, 393, 568, 1540, 568, 3616, 568, 3453, 890, 2064, 1566, 944, 112, 113, 2203, 2730], 'shape': [22, 4519]}}), ('IC0009W0395', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4089674', 'shape': [838, 40]}, 'output': {'text': '肯尼亚队以二金二银二铜在金牌榜和奖牌榜上暂时领跑', 'tokenid': [1534, 1608, 527, 1060, 48, 760, 497, 760, 659, 760, 1059, 51, 497, 1252, 1077, 232, 1173, 1252, 1077, 54, 1499, 83, 909, 1021], 'shape': [24, 4519]}}), ('IC0080W0447', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4862071', 'shape': [838, 40]}, 'output': {'text': '丰台检察院以诈骗罪对胡某路某吴某提起公诉', 'tokenid': [1437, 67, 1568, 2256, 515, 48, 3373, 1498, 506, 935, 814, 71, 327, 71, 331, 71, 239, 404, 172, 260], 'shape': [20, 4519]}}), ('ID0043W0330', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3779769', 'shape': [838, 40]}, 'output': {'text': '五亿九千九百五十二万五千零九十五点五一', 'tokenid': [401, 2721, 419, 1242, 419, 988, 401, 27, 760, 1569, 401, 1242, 783, 419, 27, 401, 74, 401, 66], 'shape': [19, 4519]}}), ('IC0085W0283', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8597211', 'shape': [837, 40]}, 'output': {'text': '该市场在二零三零年之前规模将达到七百亿美元之巨', 'tokenid': [569, 22, 545, 51, 760, 783, 24, 783, 303, 339, 931, 1769, 820, 1611, 752, 75, 748, 988, 2721, 567, 1323, 339, 2541], 'shape': [23, 4519]}}), ('IC0002W0435', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:7209787', 'shape': [836, 40]}, 'output': {'text': '五岁智障儿走失三百天母亲曾放弃寻找悔恨不已', 'tokenid': [401, 796, 1768, 1718, 249, 520, 716, 24, 988, 78, 2078, 615, 1935, 6, 1682, 1070, 388, 2787, 901, 42, 1197], 'shape': [21, 4519]}}), ('IC0080W0356', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3020101', 'shape': [836, 40]}, 'output': {'text': '那就是以三十二的微弱劣势不敌山东鲁能的老将张超', 'tokenid': [245, 220, 60, 48, 24, 27, 760, 20, 719, 846, 2609, 1551, 42, 2286, 523, 359, 1799, 177, 20, 282, 1611, 474, 21], 'shape': [23, 4519]}}), ('ID0023W0477', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:5542934', 'shape': [836, 40]}, 'output': {'text': '正式对外宣布队长惠若琪因身体原因将缺席前半个赛季', 'tokenid': [250, 821, 935, 215, 663, 985, 1060, 176, 113, 584, 1221, 377, 701, 451, 364, 377, 1611, 925, 2028, 931, 1170, 39, 1180, 1383], 'shape': [24, 4519]}}), ('IC0002W0362', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5797738', 'shape': [835, 40]}, 'output': {'text': '有望首先令杭州以及浙江当地的基建上市公司获益', 'tokenid': [107, 352, 207, 564, 2435, 580, 226, 48, 435, 2008, 356, 472, 52, 20, 228, 917, 54, 22, 172, 929, 1963, 2279], 'shape': [22, 4519]}}), ('IC0009W0232', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:976375', 'shape': [835, 40]}, 'output': {'text': '国际社会倾向于用完全不同于美国的眼光来看待中国', 'tokenid': [468, 1166, 1572, 174, 314, 543, 1398, 261, 37, 11, 42, 426, 1398, 567, 468, 20, 256, 452, 154, 121, 197, 347, 468], 'shape': [23, 4519]}}), ('IC0009W0375', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3696974', 'shape': [834, 40]}, 'output': {'text': '每支队伍派二名队员完成约八十米高的速降', 'tokenid': [469, 1168, 1060, 779, 753, 760, 444, 1060, 994, 37, 309, 686, 777, 27, 447, 246, 20, 62, 591], 'shape': [19, 4519]}}), ('ID0041W0488', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7110456', 'shape': [834, 40]}, 'output': {'text': '跑者应该有耐心循环渐进的延长跑步距离', 'tokenid': [1021, 281, 677, 569, 107, 1393, 163, 2469, 402, 1830, 1566, 20, 1158, 176, 1021, 345, 167, 899], 'shape': [18, 4519]}}), ('ID0043W0167', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:1378790', 'shape': [834, 40]}, 'output': {'text': '我想要将AM设到一零六点九兆赫', 'tokenid': [17, 130, 99, 1611, '<unk>', '<unk>', 683, 75, 66, 783, 283, 74, 419, 2955, 2404], 'shape': [15, 4519]}}), ('ID0042W0474', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.41.ark:6176382', 'shape': [832, 40]}, 'output': {'text': '沈某一个月四次进入宗某家行窃重庆晚报讯家中被偷', 'tokenid': [1610, 71, 66, 39, 745, 366, 1204, 1566, 346, 597, 71, 417, 137, 3861, 573, 834, 73, 326, 1992, 417, 347, 392, 1344], 'shape': [23, 4519]}}), ('IC0007W0451', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7066999', 'shape': [831, 40]}, 'output': {'text': '宿迁市宿城公警官破获一冒充军官跨省诈骗团伙', 'tokenid': [1283, 2508, 22, 1283, 480, 172, 949, 1234, 1319, 1963, 66, 1296, 411, 773, 1234, 1659, 514, 3373, 1498, 110, 684], 'shape': [21, 4519]}}), ('ID0040W0499', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.39.ark:6918320', 'shape': [831, 40]}, 'output': {'text': '顾客消费签帐达一定金额可捐款公益金换购雨伞与雨衣', 'tokenid': [822, 1364, 1012, 1005, 1959, 2119, 752, 66, 87, 497, 170, 47, 4094, 104, 172, 2279, 497, 124, 111, 218, 1445, 501, 218, 40], 'shape': [24, 4519]}}), ('IC0080W0434', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4581482', 'shape': [830, 40]}, 'output': {'text': '三亚海滩中秋夜垃圾八十吨台媒十米一个垃圾桶', 'tokenid': [24, 527, 448, 2109, 347, 1424, 696, 1330, 1331, 777, 27, 2853, 67, 2259, 27, 447, 66, 39, 1330, 1331, 1764], 'shape': [21, 4519]}}), ('IC0080W0490', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5823210', 'shape': [830, 40]}, 'output': {'text': '三名九零后酒后殴打出租司机致死逃窜三年被抓获', 'tokenid': [24, 444, 419, 783, 114, 871, 114, 4393, 94, 56, 2125, 929, 544, 1054, 502, 2023, 3997, 24, 303, 392, 1955, 1963], 'shape': [22, 4519]}})]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "('IC0096W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:5912144', 'shape': [1325, 40]}, 'output': {'text': '就是加入了P点对点对等网络传输的更新方式', 'tokenid': [220, 60, 285, 346, 63, '<unk>', 74, 935, 74, 935, 537, 651, 652, 664, 1746, 20, 557, 184, 292, 821], 'shape': [20, 4519]}})('IC0007W0264', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:3152908', 'shape': [927, 40]}, 'output': {'text': 'CX六百采用的也并非是FIREFOX操作系统', 'tokenid': ['<unk>', '<unk>', 283, 988, 2394, 261, 20, 229, 1073, 959, 60, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 536, 547, 1009, 1072], 'shape': [22, 4519]}})\n",
      "-----------------\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('ID0041W0413', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5453061', 'shape': [849, 40]}, 'output': {'text': '中海集运二零一一年全年净污损达七亿元', 'tokenid': [347, 448, 725, 625, 760, 783, 66, 66, 303, 11, 303, 582, 2658, 1431, 752, 748, 2721, 1323], 'shape': [18, 4519]}})[[('IC0096W0217', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4231612', 'shape': [830, 40]}, 'output': {'text': '问这次审计中的新农合资金是由卫生部管理的', 'tokenid': [77, 9, 1204, 2052, 1305, 347, 20, 184, 1065, 45, 1563, 497, 60, 1092, 823, 25, 1081, 897, 666, 20], 'shape': [20, 4519]}}), ('ID0023W0422', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:4555552', 'shape': [830, 40]}, 'output': {'text': '位于冕宁县灵山景区有七名宿营大学生因突发暴雨被困', 'tokenid': [57, 1398, 2973, 1069, 307, 1648, 523, 266, 396, 107, 748, 444, 1283, 1942, 217, 238, 25, 377, 789, 570, 513, 218, 392, 1468], 'shape': [24, 4519]}}), ('IC0002W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2972068', 'shape': [828, 40]}, 'output': {'text': '这种模式不能保证所采购产品是市场上最好的产品', 'tokenid': [9, 568, 820, 821, 42, 177, 628, 859, 434, 2394, 111, 1300, 490, 60, 22, 545, 54, 208, 120, 20, 1300, 490], 'shape': [22, 4519]}}), ('IC0002W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8293437', 'shape': [828, 40]}, 'output': {'text': '备受社会关注的新乡小冀镇五岁男孩王明涵失踪案告破', 'tokenid': [36, 504, 1572, 174, 179, 438, 20, 184, 495, 7, 3150, 819, 401, 796, 995, 866, 595, 308, 1183, 716, 2438, 566, 33, 1319], 'shape': [24, 4519]}}), ('IC0002W0349', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5524709', 'shape': [827, 40]}, 'output': {'text': '虽然杭州将为亚运会投入多少还没有官方声音流出', 'tokenid': [943, 408, 580, 226, 1611, 200, 527, 625, 174, 49, 346, 147, 148, 146, 171, 107, 1234, 292, 182, 152, 149, 56], 'shape': [22, 4519]}}), ('ID0024W0424', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4436084', 'shape': [827, 40]}, 'output': {'text': '贵阳市政府正在为远程医疗纳入医保范围给予政策支持', 'tokenid': [1600, 600, 22, 2203, 1146, 250, 51, 200, 306, 1590, 841, 3167, 2782, 346, 841, 628, 1219, 746, 70, 3422, 2203, 2730, 1168, 1485], 'shape': [24, 4519]}}), ('ID0047W0508', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:6957886', 'shape': [827, 40]}, 'output': {'text': '虽然在奥斯卡影帝角逐中败给了八零后埃迪雷德梅恩', 'tokenid': [943, 408, 51, 316, 1061, 1007, 31, 727, 1927, 3382, 347, 2894, 70, 63, 777, 783, 114, 3351, 734, 467, 621, 835, 342], 'shape': [23, 4519]}}), ('IC0009W0046', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:6749218', 'shape': [826, 40]}, 'output': {'text': '找下YESTERDAYINCEMORE', 'tokenid': [388, 142, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>'], 'shape': [19, 4519]}}), ('ID0026W0390', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.35.ark:3920047', 'shape': [826, 40]}, 'output': {'text': '荆楚网消息记者周三春通讯员文波九月九日上午', 'tokenid': [3269, 1762, 651, 1012, 997, 470, 281, 227, 24, 647, 178, 1992, 994, 772, 1735, 419, 745, 419, 521, 54, 966], 'shape': [21, 4519]}}), ('IC0080W0169', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9036911', 'shape': [825, 40]}, 'output': {'text': '河北省张金龙律师事务所副主任王罡律师认为', 'tokenid': [223, 224, 514, 474, 497, 1062, 320, 578, 363, 1018, 434, 2195, 824, 611, 595, 4222, 320, 578, 199, 200], 'shape': [20, 4519]}}), ('ID0043W0322', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3630545', 'shape': [823, 40]}, 'output': {'text': '八亿七千八百二十六万四千八百四十', 'tokenid': [777, 2721, 748, 1242, 777, 988, 760, 27, 283, 1569, 366, 1242, 777, 988, 366, 27], 'shape': [16, 4519]}}), ('IC0009W0140', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8060360', 'shape': [822, 40]}, 'output': {'text': '从业绩报告的显示评级机构的认定和行业测评的颁奖', 'tokenid': [371, 575, 1581, 326, 33, 20, 862, 948, 1727, 1107, 544, 1594, 20, 199, 87, 232, 137, 575, 2126, 1727, 20, 3370, 1173], 'shape': [23, 4519]}}), ('IC0009W0406', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4385277', 'shape': [821, 40]}, 'output': {'text': '结果因此操作时差而导致阿汤哥面临溺死水中的危险', 'tokenid': [893, 237, 377, 1099, 536, 547, 83, 460, 1044, 85, 1054, 373, 2207, 891, 216, 1157, 3390, 502, 98, 347, 20, 2486, 629], 'shape': [23, 4519]}}), ('IC0085W0275', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8421347', 'shape': [820, 40]}, 'output': {'text': 'ULA所使用的火箭搭载的是俄制RD一百八十发动机', 'tokenid': ['<unk>', '<unk>', '<unk>', 434, 654, 261, 20, 423, 3400, 2268, 258, 20, 60, 1698, 278, '<unk>', '<unk>', 66, 988, 777, 27, 570, 323, 544], 'shape': [24, 4519]}}), ('IC0002W0178', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2349226', 'shape': [819, 40]}, 'output': {'text': '打破过去在外汇管理跨境资金流动方面的多重限制', 'tokenid': [94, 1319, 420, 125, 51, 215, 2018, 897, 666, 1659, 2332, 1563, 497, 149, 323, 292, 216, 20, 147, 573, 550, 278], 'shape': [22, 4519]}}), ('IC0003W0147', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1987536', 'shape': [819, 40]}, 'output': {'text': '似乎是房企目前得到的能最快最直接产生效果的合作', 'tokenid': [882, 1883, 60, 1362, 3202, 723, 931, 471, 75, 20, 177, 208, 214, 208, 1041, 877, 1300, 25, 1074, 237, 20, 45, 547], 'shape': [23, 4519]}}), ('IC0009W0450', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5201089', 'shape': [819, 40]}, 'output': {'text': '六旬农妇建民宿月入十万立约游客刻字罚放羊', 'tokenid': [283, 2840, 1065, 300, 917, 12, 1283, 745, 346, 27, 1569, 1369, 686, 661, 1364, 1407, 445, 2030, 6, 709], 'shape': [20, 4519]}}), ('IC0006W0466', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:6576241', 'shape': [818, 40]}, 'output': {'text': '百分之六十八点七受访者不会以偶像剧主角为择偶标准', 'tokenid': [988, 421, 339, 283, 27, 777, 74, 748, 504, 2386, 281, 42, 174, 48, 1310, 117, 430, 824, 1927, 200, 1681, 1310, 55, 35], 'shape': [24, 4519]}}), ('IC0008W0382', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4627495', 'shape': [818, 40]}, 'output': {'text': '牙买加选手理查兹二十一米六九刷新国家纪录获得季军', 'tokenid': [1335, 616, 285, 687, 241, 666, 80, 2701, 760, 27, 66, 447, 283, 419, 1696, 184, 468, 417, 1694, 290, 1963, 471, 1383, 773], 'shape': [24, 4519]}}), ('IC0080W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4452397', 'shape': [818, 40]}, 'output': {'text': '每晚陪睡价码二十万元台币约三点九万人民币起跳', 'tokenid': [469, 73, 916, 791, 399, 1189, 760, 27, 1569, 1323, 67, 2647, 686, 24, 74, 419, 1569, 72, 12, 2647, 404, 1635], 'shape': [22, 4519]}}), ('ID0024W0421', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4355745', 'shape': [818, 40]}, 'output': {'text': '研究者现在还想要创建更加大型的和小型的四D印制物', 'tokenid': [1918, 1919, 281, 136, 51, 146, 130, 99, 1299, 917, 557, 285, 217, 680, 20, 232, 7, 680, 20, 366, '<unk>', 1500, 278, 132], 'shape': [24, 4519]}}), ('ID0043W0303', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3196718', 'shape': [818, 40]}, 'output': {'text': '三亿零五百七十零九千二百八十七', 'tokenid': [24, 2721, 783, 401, 988, 748, 27, 783, 419, 1242, 760, 988, 777, 27, 748], 'shape': [15, 4519]}}), ('IC0007W0123', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:133415', 'shape': [817, 40]}, 'output': {'text': '新湖中宝继去年入股温州银行抢食金改红利后', 'tokenid': [184, 464, 347, 778, 2137, 125, 303, 346, 950, 161, 226, 659, 137, 2037, 1557, 497, 630, 457, 526, 114], 'shape': [20, 4519]}}), ('IC0007W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6121728', 'shape': [817, 40]}, 'output': {'text': '今日发布了长达二分四十一秒的国剧版长全长预告', 'tokenid': [267, 521, 570, 985, 63, 176, 752, 760, 421, 366, 27, 66, 1613, 20, 468, 430, 483, 176, 11, 176, 32, 33], 'shape': [22, 4519]}}), ('IC0083W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:3833647', 'shape': [817, 40]}, 'output': {'text': '三名醉酒男子持刀捅伤一名出租车司机后骑摩托车逃跑', 'tokenid': [24, 444, 532, 871, 995, 393, 1485, 1102, 3525, 209, 66, 444, 56, 2125, 129, 929, 544, 114, 1267, 1999, 1083, 129, 2023, 1021], 'shape': [24, 4519]}}), ('IC0096W0228', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4463655', 'shape': [817, 40]}, 'output': {'text': '农村居民因病致贫因病返贫的状况得到很大缓解', 'tokenid': [1065, 496, 1263, 12, 377, 843, 1054, 3155, 377, 843, 1537, 3155, 20, 1776, 473, 471, 75, 244, 217, 1029, 1365], 'shape': [21, 4519]}}), ('IC0007W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7187124', 'shape': [816, 40]}, 'output': {'text': '六分钟吃掉一点五斤凉菜大妈获一千六百六十六元奖励', 'tokenid': [283, 421, 1014, 853, 1337, 66, 74, 401, 2589, 1162, 1353, 217, 939, 1963, 66, 1242, 283, 988, 283, 27, 283, 1323, 1173, 1938], 'shape': [24, 4519]}}), ('IC0008W0165', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:645094', 'shape': [816, 40]}, 'output': {'text': '则是宝龙在二零一四动荡之年成功逆势稳企企稳的重要举措', 'tokenid': [2134, 60, 778, 1062, 51, 760, 783, 66, 366, 323, 1827, 339, 303, 309, 1100, 1176, 1551, 1737, 3202, 3202, 1737, 20, 573, 99, 453, 2592], 'shape': [26, 4519]}}), ('IC0003W0436', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7942673', 'shape': [815, 40]}, 'output': {'text': '五岁男童遭患癌继母连砍三十多刀经抢救已脱险', 'tokenid': [401, 796, 995, 1068, 2600, 2977, 3647, 2137, 2078, 656, 2077, 24, 27, 147, 1102, 367, 2037, 1347, 1197, 97, 629], 'shape': [21, 4519]}}), ('IC0009W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5596130', 'shape': [815, 40]}, 'output': {'text': '六旬翁致已婚女怀孕生子女子瞒丈夫十四年被发现', 'tokenid': [283, 2840, 3281, 1054, 1197, 838, 766, 1257, 3070, 25, 393, 766, 393, 1862, 3854, 1066, 27, 366, 303, 392, 570, 136], 'shape': [22, 4519]}}), ('IC0085W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2998969', 'shape': [815, 40]}, 'output': {'text': '上半年中国农民工数量增百分之一年轻农民工缺口明显', 'tokenid': [54, 1170, 303, 347, 468, 1065, 12, 776, 1131, 150, 1836, 988, 421, 339, 66, 303, 443, 1065, 12, 776, 925, 944, 308, 862], 'shape': [24, 4519]}}), ('IC0097W0017', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:279411', 'shape': [815, 40]}, 'output': {'text': '西单商场天通苑购物中心天通西苑社区卫生服务站东', 'tokenid': [873, 159, 1510, 545, 78, 178, 1570, 111, 132, 347, 163, 78, 178, 873, 1570, 1572, 396, 823, 25, 41, 1018, 774, 359], 'shape': [23, 4519]}})]]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('ID0041W0505', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7505017', 'shape': [815, 40]}, 'output': {'text': '江苏重复地名让人傻傻分不清三个周庄两个茅山', 'tokenid': [356, 374, 573, 277, 52, 444, 385, 72, 1327, 1327, 421, 42, 768, 24, 39, 227, 450, 505, 39, 3452, 523], 'shape': [21, 4519]}}), ('IC0086W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:1905953', 'shape': [814, 40]}, 'output': {'text': '上校阅兵后获批率直升机绕飞仙桃市向家乡致意', 'tokenid': [54, 516, 1360, 586, 114, 1963, 1943, 1593, 1041, 403, 544, 2440, 406, 1781, 28, 22, 543, 417, 495, 1054, 439], 'shape': [21, 4519]}}), ('IC0008W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:3329467', 'shape': [813, 40]}, 'output': {'text': '并于一零一二年推出FACE加加人脸识别云平台', 'tokenid': [1073, 1398, 66, 783, 66, 760, 303, 64, 56, '<unk>', '<unk>', '<unk>', '<unk>', 285, 285, 72, 1010, 1450, 279, 487, 1223, 67], 'shape': [22, 4519]}}), ('IC0096W0124', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2318783', 'shape': [813, 40]}, 'output': {'text': '如近期苏州南京出现了溢价率超过百分之七十五的地块', 'tokenid': [236, 19, 196, 374, 226, 135, 360, 56, 136, 63, 1989, 399, 1593, 21, 420, 988, 421, 339, 748, 27, 401, 20, 52, 907], 'shape': [24, 4519]}}), ('IC0096W0493', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9927800', 'shape': [813, 40]}, 'output': {'text': '新教师和师范生代表在尊师重教纪念碑前宣誓静安公园', 'tokenid': [184, 325, 578, 232, 578, 1219, 25, 839, 1116, 51, 2242, 578, 573, 325, 1694, 1120, 945, 931, 663, 2772, 828, 720, 172, 1301], 'shape': [24, 4519]}}), ('IC0008W0457', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:5454681', 'shape': [812, 40]}, 'output': {'text': '水管里流出来的水冲洗着一辆悬挂广东号牌的丰田汽轿车', 'tokenid': [98, 897, 10, 149, 56, 154, 20, 98, 1576, 119, 341, 66, 1604, 2629, 1881, 365, 359, 860, 1252, 20, 1437, 484, 1306, 1969, 129], 'shape': [25, 4519]}}), ('IC0009W0133', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7928969', 'shape': [812, 40]}, 'output': {'text': '宝龙地产的收入较二零一三年增加约百分之三十三点一', 'tokenid': [778, 1062, 52, 1300, 20, 131, 346, 581, 760, 783, 66, 24, 303, 1836, 285, 686, 988, 421, 339, 24, 27, 24, 74, 66], 'shape': [24, 4519]}}), ('ID0044W0517', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.43.ark:7023639', 'shape': [812, 40]}, 'output': {'text': '联建光电LED显示应用产品实现销售收入三点七亿元', 'tokenid': [1008, 917, 452, 30, '<unk>', '<unk>', '<unk>', 862, 948, 677, 261, 1300, 490, 1207, 136, 977, 978, 131, 346, 24, 74, 748, 2721, 1323], 'shape': [24, 4519]}}), ('ID0041W0406', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5291230', 'shape': [811, 40]}, 'output': {'text': '杭州上海和广州电视商务服务指数分列前三位', 'tokenid': [580, 226, 54, 448, 232, 365, 226, 30, 93, 1510, 1018, 41, 1018, 134, 1131, 421, 429, 931, 24, 57], 'shape': [20, 4519]}}), ('IC0003W0478', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8898499', 'shape': [810, 40]}, 'output': {'text': '持刀和铁棍挟持传销头目后引起市民围观成功报警', 'tokenid': [1485, 1102, 232, 1261, 2277, 4490, 1485, 664, 977, 118, 723, 114, 2650, 404, 22, 12, 746, 807, 309, 1100, 326, 949], 'shape': [22, 4519]}}), ('IC0085W0330', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:673701', 'shape': [810, 40]}, 'output': {'text': '中信银行副行长郭党怀在银行业例行新闻发布会透露', 'tokenid': [347, 369, 659, 137, 2195, 137, 176, 1126, 2201, 1257, 51, 659, 137, 575, 454, 137, 184, 185, 570, 985, 174, 1554, 1491], 'shape': [23, 4519]}}), ('IC0080W0202', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9823240', 'shape': [809, 40]}, 'output': {'text': '严格政府水利投融资企业的年度审计制度', 'tokenid': [1313, 400, 2203, 1146, 98, 526, 49, 2478, 1563, 3202, 575, 20, 303, 139, 2052, 1305, 278, 139], 'shape': [18, 4519]}}), ('IC0096W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9763416', 'shape': [809, 40]}, 'output': {'text': '上海已能对十六种癌基因筛查几率较高将可干预', 'tokenid': [54, 448, 1197, 177, 935, 27, 283, 568, 3647, 228, 377, 4187, 80, 296, 1593, 581, 246, 1611, 47, 175, 32], 'shape': [21, 4519]}}), ('IC0080W0304', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:1884225', 'shape': [808, 40]}, 'output': {'text': '机器换人引发了是否会增加工厂裁员员工失业的疑虑', 'tokenid': [544, 1001, 124, 72, 2650, 570, 63, 60, 61, 174, 1836, 285, 776, 2169, 2869, 994, 994, 776, 716, 575, 20, 1258, 2545], 'shape': [23, 4519]}}), ('ID0043W0516', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:6961787', 'shape': [808, 40]}, 'output': {'text': '早在二零零五年的南宁全国青年竞走锦标赛上', 'tokenid': [418, 51, 760, 783, 783, 401, 303, 20, 135, 1069, 11, 468, 486, 303, 2725, 520, 1298, 55, 1180, 54], 'shape': [20, 4519]}}), ('IC0002W0240', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:3533192', 'shape': [807, 40]}, 'output': {'text': '现在需要对央企现有的经营产品链条进行重新思考', 'tokenid': [136, 51, 212, 99, 935, 809, 3202, 136, 107, 20, 367, 1942, 1300, 490, 1186, 1016, 1566, 137, 573, 184, 693, 1312], 'shape': [22, 4519]}}), ('IC0095W0459', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:8490919', 'shape': [807, 40]}, 'output': {'text': '新民网记者在现场采访了该商场工程物业的王姓副总', 'tokenid': [184, 12, 651, 470, 281, 51, 136, 545, 2394, 2386, 63, 569, 1510, 545, 776, 1590, 132, 575, 20, 595, 2615, 2195, 878], 'shape': [23, 4519]}}), ('IC0006W0477', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:6772771', 'shape': [806, 40]}, 'output': {'text': '她杀人的秘密直至上周背著尸体的画面曝光后才被曝光', 'tokenid': [886, 531, 72, 20, 981, 982, 1041, 668, 54, 227, 265, 2732, 1911, 451, 20, 1442, 216, 3674, 452, 114, 673, 392, 3674, 452], 'shape': [24, 4519]}}), ('IC0080W0218', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:272187', 'shape': [806, 40]}, 'output': {'text': '尤其是明确了见义勇为死亡人员抚恤补助政策', 'tokenid': [1373, 1144, 60, 308, 1325, 63, 193, 1399, 1475, 200, 502, 2024, 72, 994, 2233, 3817, 2432, 133, 2203, 2730], 'shape': [20, 4519]}}), ('IC0096W0455', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9072946', 'shape': [806, 40]}, 'output': {'text': '上海小龙虾盖浇饭店回应中毒事件对手恶意诽谤', 'tokenid': [54, 448, 7, 1062, 1473, 1161, 1858, 852, 106, 511, 677, 347, 2365, 363, 1105, 935, 241, 712, 439, 2444, '<unk>'], 'shape': [21, 4519]}}), ('IC0083W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:3333608', 'shape': [805, 40]}, 'output': {'text': '三男子租厂房装修三层办公楼诈骗九十八家企业七百三十万', 'tokenid': [24, 995, 393, 2125, 2169, 1362, 942, 698, 24, 1307, 165, 172, 1294, 3373, 1498, 419, 27, 777, 417, 3202, 575, 748, 988, 24, 27, 1569], 'shape': [26, 4519]}}), ('IC0085W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:3322671', 'shape': [805, 40]}, 'output': {'text': '该办事处安监办副主任陈武被指酒后无故拉响消防警报', 'tokenid': [569, 165, 363, 665, 720, 2050, 165, 2195, 824, 611, 186, 726, 392, 134, 871, 114, 509, 362, 188, 153, 1012, 955, 949, 326], 'shape': [24, 4519]}}), ('IC0096W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2475861', 'shape': [804, 40]}, 'output': {'text': '而剩余极少数纯商住宅部分价格被推升至畸高', 'tokenid': [1044, 1004, 169, 1089, 148, 1131, 609, 1510, 381, 1597, 1081, 421, 399, 400, 392, 64, 403, 668, '<unk>', 246], 'shape': [20, 4519]}}), ('IC0096W0169', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:3241668', 'shape': [804, 40]}, 'output': {'text': '国家统计局城市司高级统计师刘建伟进行了解读', 'tokenid': [468, 417, 1072, 1305, 1457, 480, 22, 929, 246, 1107, 1072, 1305, 578, 583, 917, 1230, 1566, 137, 63, 1365, 1199], 'shape': [21, 4519]}}), ('IC0096W0331', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6615494', 'shape': [804, 40]}, 'output': {'text': '引导社会资本参与充电基础设施体系设运营', 'tokenid': [2650, 85, 1572, 174, 1563, 105, 1326, 501, 411, 30, 228, 2280, 683, 1178, 451, 1009, 683, 625, 1942], 'shape': [19, 4519]}}), ('IC0009W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7856710', 'shape': [803, 40]}, 'output': {'text': '宝龙美术馆已与宝龙华韵宝龙拍卖书藏楼形成联动', 'tokenid': [778, 1062, 567, 1032, 762, 1197, 501, 778, 1062, 562, 357, 778, 1062, 1848, 397, 697, 804, 1294, 1145, 309, 1008, 323], 'shape': [22, 4519]}}), ('IC0003W0192', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:2969261', 'shape': [801, 40]}, 'output': {'text': '今年北京四川河南等省市率先公布了三公经费', 'tokenid': [267, 303, 224, 360, 366, 1334, 223, 135, 537, 514, 22, 1593, 564, 172, 985, 63, 24, 172, 367, 1005], 'shape': [20, 4519]}}), ('IC0007W0177', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:1312637', 'shape': [801, 40]}, 'output': {'text': '北京深圳由去年年中的二十个月降至年底的十二个月', 'tokenid': [224, 360, 695, 1193, 1092, 125, 303, 303, 347, 20, 760, 27, 39, 745, 591, 668, 303, 1195, 20, 27, 760, 39, 745], 'shape': [23, 4519]}}), ('IC0084W0170', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:7087869', 'shape': [801, 40]}, 'output': {'text': '其监测十大重点城市四十四个老推新项目中有一半出现价格上涨', 'tokenid': [1144, 2050, 2126, 27, 217, 573, 74, 480, 22, 366, 27, 366, 39, 282, 64, 184, 2447, 723, 347, 107, 66, 1170, 56, 136, 399, 400, 54, 3385], 'shape': [28, 4519]}}), ('IC0097W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:4822197', 'shape': [801, 40]}, 'output': {'text': '外媒BUSINESSINSIDER认为', 'tokenid': [215, 2259, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 199, 200], 'shape': [19, 4519]}}), ('ID0041W0515', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7702267', 'shape': [801, 40]}, 'output': {'text': '而作为软硬件提供者的英特尔更是深谙此道', 'tokenid': [1044, 547, 200, 1577, 1726, 1105, 239, 2214, 281, 20, 493, 336, 529, 557, 60, 695, 3020, 1099, 44], 'shape': [19, 4519]}}), ('IC0003W0278', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:4673899', 'shape': [800, 40]}, 'output': {'text': 'CES的所有利润均再投资到CEA的产业服务中', 'tokenid': ['<unk>', '<unk>', '<unk>', 20, 434, 107, 526, 2166, 671, 386, 49, 1563, 75, '<unk>', '<unk>', '<unk>', 20, 1300, 575, 41, 1018, 347], 'shape': [22, 4519]}})]]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[('IC0085W0214', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:7183574', 'shape': [879, 40]}, 'output': {'text': '某保险公司权益投资部总经理曾某借职务之便建老鼠仓', 'tokenid': [71, 628, 629, 172, 929, 1480, 2279, 49, 1563, 1081, 878, 367, 666, 1935, 71, 714, 2124, 1018, 339, 293, 917, 282, 1441, 2852], 'shape': [24, 4519]}}), ('IC0009W0360', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3432159', 'shape': [878, 40]}, 'output': {'text': '首先由带有浓浓山海关特色的项目一古城定向开场', 'tokenid': [207, 564, 1092, 494, 107, 2702, 2702, 523, 448, 179, 336, 724, 20, 2447, 723, 66, 1128, 480, 87, 543, 95, 545], 'shape': [22, 4519]}}), ('IC0007W0129', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:297573', 'shape': [877, 40]}, 'output': {'text': '持股比例直逼宁波银行第一大股东新加坡华侨银行', 'tokenid': [1485, 950, 268, 454, 1041, 2357, 1069, 1735, 659, 137, 254, 66, 217, 950, 359, 184, 285, 1853, 562, 2989, 659, 137], 'shape': [22, 4519]}}), ('ID0020W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.29.ark:4167261', 'shape': [877, 40]}, 'output': {'text': '浙江杀人嫌犯潜逃一七年持兄长身份证乘地铁被抓', 'tokenid': [2008, 356, 531, 72, 2129, 2270, 2474, 2023, 66, 748, 303, 1485, 589, 176, 701, 858, 859, 1771, 52, 1261, 392, 1955], 'shape': [22, 4519]}}), ('ID0043W0520', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:7043799', 'shape': [877, 40]}, 'output': {'text': '严禁骗取套取中央和省级财政城市棚户区改造专项资金', 'tokenid': [1313, 1386, 1498, 1011, 1866, 1011, 347, 809, 232, 514, 1107, 1715, 2203, 480, 22, 1171, 168, 396, 630, 1855, 204, 2447, 1563, 497], 'shape': [24, 4519]}}), ('IC0080W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5212413', 'shape': [875, 40]}, 'output': {'text': '三兄弟驾渔船长江翻船老大漂流十余公里获救', 'tokenid': [24, 589, 590, 1772, 3739, 1028, 176, 356, 1815, 1028, 282, 217, 412, 149, 27, 169, 172, 10, 1963, 1347], 'shape': [20, 4519]}}), ('IC0009W0126', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7748858', 'shape': [873, 40]}, 'output': {'text': '以及类酒店产品宝龙客栈及宝龙少海房车露营地', 'tokenid': [48, 435, 340, 871, 106, 1300, 490, 778, 1062, 1364, 3147, 435, 778, 1062, 148, 448, 1362, 129, 1491, 1942, 52], 'shape': [21, 4519]}}), ('IC0007W0454', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7133698', 'shape': [872, 40]}, 'output': {'text': '六内地乘客打伤七香港机场的勤续四人被判处监禁', 'tokenid': [283, 519, 52, 1771, 1364, 94, 209, 748, 1113, 1135, 544, 545, 20, 1754, 1856, 366, 72, 392, 1786, 665, 2050, 1386], 'shape': [22, 4519]}}), ('IC0080W0127', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:8159405', 'shape': [872, 40]}, 'output': {'text': '野三坡风景名胜区范围为为东北方向与北京市相邻', 'tokenid': [485, 24, 1853, 145, 266, 444, 1148, 396, 1219, 746, 200, 200, 359, 224, 292, 543, 501, 224, 360, 22, 831, 3331], 'shape': [22, 4519]}}), ('IC0096W0374', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7466033', 'shape': [872, 40]}, 'output': {'text': '东道主福州队的徐婷婷摘得女子飞碟双向桂冠', 'tokenid': [359, 44, 824, 642, 226, 1060, 20, 856, 1870, 1870, 3220, 471, 766, 393, 406, 2400, 255, 543, 1761, 1462], 'shape': [20, 4519]}}), ('IC0007W0128', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:262380', 'shape': [871, 40]}, 'output': {'text': '雅戈尔持有宁波银行总股份达三点五一亿股', 'tokenid': [498, 2812, 529, 1485, 107, 1069, 1735, 659, 137, 878, 950, 858, 752, 24, 74, 401, 66, 2721, 950], 'shape': [19, 4519]}}), ('ID0043W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5277342', 'shape': [868, 40]}, 'output': {'text': '而张继科则是拿下了生涯第六个全锦赛男单奖牌', 'tokenid': [1044, 474, 2137, 1523, 2134, 60, 933, 142, 63, 25, 2319, 254, 283, 39, 11, 1298, 1180, 995, 159, 1173, 1252], 'shape': [21, 4519]}}), ('IC0080W0432', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4527736', 'shape': [867, 40]}, 'output': {'text': '三亚景区售玳瑁三万元两只二月份曾被立案调查', 'tokenid': [24, 527, 266, 396, 978, '<unk>', '<unk>', 24, 1569, 1323, 505, 96, 760, 745, 858, 1935, 392, 1369, 566, 160, 80], 'shape': [21, 4519]}}), ('IC0007W0168', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:1098660', 'shape': [866, 40]}, 'output': {'text': '信贷和公积金政策的逐渐宽松对市场而言是较大的利好', 'tokenid': [369, 2975, 232, 172, 1621, 497, 2203, 2730, 20, 3382, 1830, 1790, 354, 935, 22, 545, 1044, 4, 60, 581, 217, 20, 526, 120], 'shape': [24, 4519]}}), ('IC0080W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4096163', 'shape': [865, 40]}, 'output': {'text': '中国新闻网七月二十三日报道据台湾东森新闻消息', 'tokenid': [347, 468, 184, 185, 651, 748, 745, 760, 27, 24, 521, 326, 44, 1775, 67, 1917, 359, 1505, 184, 185, 1012, 997], 'shape': [22, 4519]}}), ('IC0007W0481', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7810269', 'shape': [863, 40]}, 'output': {'text': '六岁女童村中被扎身亡货车扎头部后仍继续行驶', 'tokenid': [283, 796, 766, 1068, 496, 347, 392, 2387, 701, 2024, 996, 129, 2387, 118, 1081, 114, 3031, 2137, 1856, 137, 138], 'shape': [21, 4519]}}), ('IC0080W0385', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3581018', 'shape': [863, 40]}, 'output': {'text': '难遇天才竟急速沉沦张继科在负面新闻中渐行渐远', 'tokenid': [937, 699, 78, 673, 2155, 455, 62, 1797, 2855, 474, 2137, 1523, 51, 731, 216, 184, 185, 347, 1830, 137, 1830, 306], 'shape': [22, 4519]}}), ('IC0003W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1890364', 'shape': [858, 40]}, 'output': {'text': '怎么完成传统房企与互联网融合落地都是其研究的大事', 'tokenid': [101, 109, 37, 309, 664, 1072, 1362, 3202, 501, 2806, 1008, 651, 2478, 45, 717, 52, 173, 60, 1144, 1918, 1919, 20, 217, 363], 'shape': [24, 4519]}}), ('IC0080W0443', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4757299', 'shape': [858, 40]}, 'output': {'text': '三亚餐厅扇贝十五元一份变十五元一个续同意退款', 'tokenid': [24, 527, 884, 1542, 2789, 863, 27, 401, 1323, 66, 858, 730, 27, 401, 1323, 66, 39, 1856, 426, 439, 627, 104], 'shape': [22, 4519]}}), ('ID0047W0379', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4383869', 'shape': [858, 40]}, 'output': {'text': '索玛基金会理事长黄红斌被当地森林公安带走', 'tokenid': [16, 2017, 228, 497, 174, 666, 363, 176, 489, 457, 816, 392, 472, 52, 1505, 593, 172, 720, 494, 520], 'shape': [20, 4519]}}), ('IC0080W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4918257', 'shape': [857, 40]}, 'output': {'text': '华商报咸阳讯记者张林三小伙伴相约出去玩水', 'tokenid': [562, 1510, 326, 3364, 600, 1992, 470, 281, 474, 593, 24, 7, 684, 409, 831, 686, 56, 125, 221, 98], 'shape': [20, 4519]}}), ('IC0087W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:5112895', 'shape': [855, 40]}, 'output': {'text': '国家统计局今日发布七十大中城市房价变动情况', 'tokenid': [468, 417, 1072, 1305, 1457, 267, 521, 570, 985, 748, 27, 217, 347, 480, 22, 1362, 399, 730, 323, 164, 473], 'shape': [21, 4519]}}), ('IC0097W0163', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:2539429', 'shape': [855, 40]}, 'output': {'text': '二零一五年上半年又经历了三次降息两次降准调整力度堪比二零零八年', 'tokenid': [760, 783, 66, 401, 303, 54, 1170, 303, 324, 367, 1285, 63, 24, 1204, 591, 997, 505, 1204, 591, 35, 160, 658, 270, 139, 3053, 268, 760, 783, 783, 777, 303], 'shape': [31, 4519]}}), ('IC0096W0479', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9619418', 'shape': [854, 40]}, 'output': {'text': '上海崇明通报村官腐案虚开发票套取公款等', 'tokenid': [54, 448, 1960, 308, 178, 326, 496, 1234, 1595, 566, 2475, 95, 570, 626, 1866, 1011, 172, 104, 537], 'shape': [19, 4519]}}), ('ID0041W0399', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5125879', 'shape': [854, 40]}, 'output': {'text': '现在微传科技也积极在海外对自己的技术进行保护', 'tokenid': [136, 51, 719, 664, 1523, 1555, 229, 1621, 1089, 51, 448, 215, 935, 275, 276, 20, 1555, 1032, 1566, 137, 628, 1670], 'shape': [22, 4519]}}), ('IC0003W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8134883', 'shape': [853, 40]}, 'output': {'text': '孩子的父亲在手术室外痛哭昨日下午二时三十分左右', 'tokenid': [866, 393, 20, 614, 615, 51, 241, 1032, 782, 215, 634, 1447, 605, 521, 142, 966, 760, 83, 24, 27, 421, 1824, 2063], 'shape': [23, 4519]}}), ('IC0003W0480', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8949725', 'shape': [853, 40]}, 'output': {'text': '五男孩去玩水一人溺亡小伙伴将其物品扔掉隐瞒', 'tokenid': [401, 995, 866, 125, 221, 98, 66, 72, 3390, 2024, 7, 684, 409, 1611, 1144, 132, 490, 2880, 1337, 2196, 1862], 'shape': [21, 4519]}}), ('IC0002W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1716617', 'shape': [852, 40]}, 'output': {'text': '而另外一剂楼市强心剂则当属央行的不对称降息', 'tokenid': [1044, 2092, 215, 66, 3040, 1294, 22, 844, 163, 3040, 2134, 472, 620, 809, 137, 20, 42, 935, 938, 591, 997], 'shape': [21, 4519]}}), ('IC0003W0482', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9006551', 'shape': [852, 40]}, 'output': {'text': '周至警方初步排除他杀可能一当事孩子称', 'tokenid': [227, 668, 949, 292, 1795, 345, 144, 587, 194, 531, 47, 177, 66, 472, 363, 866, 393, 938], 'shape': [18, 4519]}}), ('IC0007W0253', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2915505', 'shape': [851, 40]}, 'output': {'text': '透明窗式显示屏交互式虚拟镜子以及拍照手机等', 'tokenid': [1554, 308, 770, 821, 862, 948, 1612, 81, 2806, 821, 2475, 3479, 1385, 393, 48, 435, 1848, 1259, 241, 544, 537], 'shape': [21, 4519]}}), ('IC0084W0154', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:6790101', 'shape': [850, 40]}, 'output': {'text': '五十四个城市住宅签约总量达十九万四千九百零九套', 'tokenid': [401, 27, 366, 39, 480, 22, 381, 1597, 1959, 686, 878, 150, 752, 27, 419, 1569, 366, 1242, 419, 988, 783, 419, 1866], 'shape': [23, 4519]}}), ('IC0009W0452', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5256635', 'shape': [849, 40]}, 'output': {'text': '六旬大妈伪造签证多次赴美涉偷越国境被判管制一年', 'tokenid': [283, 2840, 217, 939, 1515, 1855, 1959, 859, 147, 1204, 3497, 567, 1743, 1344, 446, 468, 2332, 392, 1786, 897, 278, 66, 303], 'shape': [23, 4519]}})]]-----------------"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------\n",
      "-----------------\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0096W0217', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4231612', 'shape': [830, 40]}, 'output': {'text': '问这次审计中的新农合资金是由卫生部管理的', 'tokenid': [77, 9, 1204, 2052, 1305, 347, 20, 184, 1065, 45, 1563, 497, 60, 1092, 823, 25, 1081, 897, 666, 20], 'shape': [20, 4519]}}), ('ID0023W0422', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.32.ark:4555552', 'shape': [830, 40]}, 'output': {'text': '位于冕宁县灵山景区有七名宿营大学生因突发暴雨被困', 'tokenid': [57, 1398, 2973, 1069, 307, 1648, 523, 266, 396, 107, 748, 444, 1283, 1942, 217, 238, 25, 377, 789, 570, 513, 218, 392, 1468], 'shape': [24, 4519]}}), ('IC0002W0212', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2972068', 'shape': [828, 40]}, 'output': {'text': '这种模式不能保证所采购产品是市场上最好的产品', 'tokenid': [9, 568, 820, 821, 42, 177, 628, 859, 434, 2394, 111, 1300, 490, 60, 22, 545, 54, 208, 120, 20, 1300, 490], 'shape': [22, 4519]}}), ('IC0002W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:8293437', 'shape': [828, 40]}, 'output': {'text': '备受社会关注的新乡小冀镇五岁男孩王明涵失踪案告破', 'tokenid': [36, 504, 1572, 174, 179, 438, 20, 184, 495, 7, 3150, 819, 401, 796, 995, 866, 595, 308, 1183, 716, 2438, 566, 33, 1319], 'shape': [24, 4519]}}), ('IC0002W0349', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:5524709', 'shape': [827, 40]}, 'output': {'text': '虽然杭州将为亚运会投入多少还没有官方声音流出', 'tokenid': [943, 408, 580, 226, 1611, 200, 527, 625, 174, 49, 346, 147, 148, 146, 171, 107, 1234, 292, 182, 152, 149, 56], 'shape': [22, 4519]}}), ('ID0024W0424', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4436084', 'shape': [827, 40]}, 'output': {'text': '贵阳市政府正在为远程医疗纳入医保范围给予政策支持', 'tokenid': [1600, 600, 22, 2203, 1146, 250, 51, 200, 306, 1590, 841, 3167, 2782, 346, 841, 628, 1219, 746, 70, 3422, 2203, 2730, 1168, 1485], 'shape': [24, 4519]}}), ('ID0047W0508', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:6957886', 'shape': [827, 40]}, 'output': {'text': '虽然在奥斯卡影帝角逐中败给了八零后埃迪雷德梅恩', 'tokenid': [943, 408, 51, 316, 1061, 1007, 31, 727, 1927, 3382, 347, 2894, 70, 63, 777, 783, 114, 3351, 734, 467, 621, 835, 342], 'shape': [23, 4519]}}), ('IC0009W0046', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:6749218', 'shape': [826, 40]}, 'output': {'text': '找下YESTERDAYINCEMORE', 'tokenid': [388, 142, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>'], 'shape': [19, 4519]}}), ('ID0026W0390', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.35.ark:3920047', 'shape': [826, 40]}, 'output': {'text': '荆楚网消息记者周三春通讯员文波九月九日上午', 'tokenid': [3269, 1762, 651, 1012, 997, 470, 281, 227, 24, 647, 178, 1992, 994, 772, 1735, 419, 745, 419, 521, 54, 966], 'shape': [21, 4519]}}), ('IC0080W0169', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9036911', 'shape': [825, 40]}, 'output': {'text': '河北省张金龙律师事务所副主任王罡律师认为', 'tokenid': [223, 224, 514, 474, 497, 1062, 320, 578, 363, 1018, 434, 2195, 824, 611, 595, 4222, 320, 578, 199, 200], 'shape': [20, 4519]}}), ('ID0043W0322', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3630545', 'shape': [823, 40]}, 'output': {'text': '八亿七千八百二十六万四千八百四十', 'tokenid': [777, 2721, 748, 1242, 777, 988, 760, 27, 283, 1569, 366, 1242, 777, 988, 366, 27], 'shape': [16, 4519]}}), ('IC0009W0140', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:8060360', 'shape': [822, 40]}, 'output': {'text': '从业绩报告的显示评级机构的认定和行业测评的颁奖', 'tokenid': [371, 575, 1581, 326, 33, 20, 862, 948, 1727, 1107, 544, 1594, 20, 199, 87, 232, 137, 575, 2126, 1727, 20, 3370, 1173], 'shape': [23, 4519]}}), ('IC0009W0406', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:4385277', 'shape': [821, 40]}, 'output': {'text': '结果因此操作时差而导致阿汤哥面临溺死水中的危险', 'tokenid': [893, 237, 377, 1099, 536, 547, 83, 460, 1044, 85, 1054, 373, 2207, 891, 216, 1157, 3390, 502, 98, 347, 20, 2486, 629], 'shape': [23, 4519]}}), ('IC0085W0275', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:8421347', 'shape': [820, 40]}, 'output': {'text': 'ULA所使用的火箭搭载的是俄制RD一百八十发动机', 'tokenid': ['<unk>', '<unk>', '<unk>', 434, 654, 261, 20, 423, 3400, 2268, 258, 20, 60, 1698, 278, '<unk>', '<unk>', 66, 988, 777, 27, 570, 323, 544], 'shape': [24, 4519]}}), ('IC0002W0178', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:2349226', 'shape': [819, 40]}, 'output': {'text': '打破过去在外汇管理跨境资金流动方面的多重限制', 'tokenid': [94, 1319, 420, 125, 51, 215, 2018, 897, 666, 1659, 2332, 1563, 497, 149, 323, 292, 216, 20, 147, 573, 550, 278], 'shape': [22, 4519]}}), ('IC0003W0147', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1987536', 'shape': [819, 40]}, 'output': {'text': '似乎是房企目前得到的能最快最直接产生效果的合作', 'tokenid': [882, 1883, 60, 1362, 3202, 723, 931, 471, 75, 20, 177, 208, 214, 208, 1041, 877, 1300, 25, 1074, 237, 20, 45, 547], 'shape': [23, 4519]}}), ('IC0009W0450', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5201089', 'shape': [819, 40]}, 'output': {'text': '六旬农妇建民宿月入十万立约游客刻字罚放羊', 'tokenid': [283, 2840, 1065, 300, 917, 12, 1283, 745, 346, 27, 1569, 1369, 686, 661, 1364, 1407, 445, 2030, 6, 709], 'shape': [20, 4519]}}), ('IC0006W0466', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:6576241', 'shape': [818, 40]}, 'output': {'text': '百分之六十八点七受访者不会以偶像剧主角为择偶标准', 'tokenid': [988, 421, 339, 283, 27, 777, 74, 748, 504, 2386, 281, 42, 174, 48, 1310, 117, 430, 824, 1927, 200, 1681, 1310, 55, 35], 'shape': [24, 4519]}}), ('IC0008W0382', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:4627495', 'shape': [818, 40]}, 'output': {'text': '牙买加选手理查兹二十一米六九刷新国家纪录获得季军', 'tokenid': [1335, 616, 285, 687, 241, 666, 80, 2701, 760, 27, 66, 447, 283, 419, 1696, 184, 468, 417, 1694, 290, 1963, 471, 1383, 773], 'shape': [24, 4519]}}), ('IC0080W0429', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4452397', 'shape': [818, 40]}, 'output': {'text': '每晚陪睡价码二十万元台币约三点九万人民币起跳', 'tokenid': [469, 73, 916, 791, 399, 1189, 760, 27, 1569, 1323, 67, 2647, 686, 24, 74, 419, 1569, 72, 12, 2647, 404, 1635], 'shape': [22, 4519]}}), ('ID0024W0421', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.33.ark:4355745', 'shape': [818, 40]}, 'output': {'text': '研究者现在还想要创建更加大型的和小型的四D印制物', 'tokenid': [1918, 1919, 281, 136, 51, 146, 130, 99, 1299, 917, 557, 285, 217, 680, 20, 232, 7, 680, 20, 366, '<unk>', 1500, 278, 132], 'shape': [24, 4519]}}), ('ID0043W0303', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:3196718', 'shape': [818, 40]}, 'output': {'text': '三亿零五百七十零九千二百八十七', 'tokenid': [24, 2721, 783, 401, 988, 748, 27, 783, 419, 1242, 760, 988, 777, 27, 748], 'shape': [15, 4519]}}), ('IC0007W0123', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:133415', 'shape': [817, 40]}, 'output': {'text': '新湖中宝继去年入股温州银行抢食金改红利后', 'tokenid': [184, 464, 347, 778, 2137, 125, 303, 346, 950, 161, 226, 659, 137, 2037, 1557, 497, 630, 457, 526, 114], 'shape': [20, 4519]}}), ('IC0007W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:6121728', 'shape': [817, 40]}, 'output': {'text': '今日发布了长达二分四十一秒的国剧版长全长预告', 'tokenid': [267, 521, 570, 985, 63, 176, 752, 760, 421, 366, 27, 66, 1613, 20, 468, 430, 483, 176, 11, 176, 32, 33], 'shape': [22, 4519]}}), ('IC0083W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:3833647', 'shape': [817, 40]}, 'output': {'text': '三名醉酒男子持刀捅伤一名出租车司机后骑摩托车逃跑', 'tokenid': [24, 444, 532, 871, 995, 393, 1485, 1102, 3525, 209, 66, 444, 56, 2125, 129, 929, 544, 114, 1267, 1999, 1083, 129, 2023, 1021], 'shape': [24, 4519]}}), ('IC0096W0228', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4463655', 'shape': [817, 40]}, 'output': {'text': '农村居民因病致贫因病返贫的状况得到很大缓解', 'tokenid': [1065, 496, 1263, 12, 377, 843, 1054, 3155, 377, 843, 1537, 3155, 20, 1776, 473, 471, 75, 244, 217, 1029, 1365], 'shape': [21, 4519]}}), ('IC0007W0456', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7187124', 'shape': [816, 40]}, 'output': {'text': '六分钟吃掉一点五斤凉菜大妈获一千六百六十六元奖励', 'tokenid': [283, 421, 1014, 853, 1337, 66, 74, 401, 2589, 1162, 1353, 217, 939, 1963, 66, 1242, 283, 988, 283, 27, 283, 1323, 1173, 1938], 'shape': [24, 4519]}}), ('IC0008W0165', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:645094', 'shape': [816, 40]}, 'output': {'text': '则是宝龙在二零一四动荡之年成功逆势稳企企稳的重要举措', 'tokenid': [2134, 60, 778, 1062, 51, 760, 783, 66, 366, 323, 1827, 339, 303, 309, 1100, 1176, 1551, 1737, 3202, 3202, 1737, 20, 573, 99, 453, 2592], 'shape': [26, 4519]}}), ('IC0003W0436', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:7942673', 'shape': [815, 40]}, 'output': {'text': '五岁男童遭患癌继母连砍三十多刀经抢救已脱险', 'tokenid': [401, 796, 995, 1068, 2600, 2977, 3647, 2137, 2078, 656, 2077, 24, 27, 147, 1102, 367, 2037, 1347, 1197, 97, 629], 'shape': [21, 4519]}}), ('IC0009W0467', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5596130', 'shape': [815, 40]}, 'output': {'text': '六旬翁致已婚女怀孕生子女子瞒丈夫十四年被发现', 'tokenid': [283, 2840, 3281, 1054, 1197, 838, 766, 1257, 3070, 25, 393, 766, 393, 1862, 3854, 1066, 27, 366, 303, 392, 570, 136], 'shape': [22, 4519]}}), ('IC0085W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:2998969', 'shape': [815, 40]}, 'output': {'text': '上半年中国农民工数量增百分之一年轻农民工缺口明显', 'tokenid': [54, 1170, 303, 347, 468, 1065, 12, 776, 1131, 150, 1836, 988, 421, 339, 66, 303, 443, 1065, 12, 776, 925, 944, 308, 862], 'shape': [24, 4519]}}), ('IC0097W0017', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:279411', 'shape': [815, 40]}, 'output': {'text': '西单商场天通苑购物中心天通西苑社区卫生服务站东', 'tokenid': [873, 159, 1510, 545, 78, 178, 1570, 111, 132, 347, 163, 78, 178, 873, 1570, 1572, 396, 823, 25, 41, 1018, 774, 359], 'shape': [23, 4519]}})]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('ID0041W0505', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7505017', 'shape': [815, 40]}, 'output': {'text': '江苏重复地名让人傻傻分不清三个周庄两个茅山', 'tokenid': [356, 374, 573, 277, 52, 444, 385, 72, 1327, 1327, 421, 42, 768, 24, 39, 227, 450, 505, 39, 3452, 523], 'shape': [21, 4519]}}), ('IC0086W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:1905953', 'shape': [814, 40]}, 'output': {'text': '上校阅兵后获批率直升机绕飞仙桃市向家乡致意', 'tokenid': [54, 516, 1360, 586, 114, 1963, 1943, 1593, 1041, 403, 544, 2440, 406, 1781, 28, 22, 543, 417, 495, 1054, 439], 'shape': [21, 4519]}}), ('IC0008W0306', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:3329467', 'shape': [813, 40]}, 'output': {'text': '并于一零一二年推出FACE加加人脸识别云平台', 'tokenid': [1073, 1398, 66, 783, 66, 760, 303, 64, 56, '<unk>', '<unk>', '<unk>', '<unk>', 285, 285, 72, 1010, 1450, 279, 487, 1223, 67], 'shape': [22, 4519]}}), ('IC0096W0124', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2318783', 'shape': [813, 40]}, 'output': {'text': '如近期苏州南京出现了溢价率超过百分之七十五的地块', 'tokenid': [236, 19, 196, 374, 226, 135, 360, 56, 136, 63, 1989, 399, 1593, 21, 420, 988, 421, 339, 748, 27, 401, 20, 52, 907], 'shape': [24, 4519]}}), ('IC0096W0493', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9927800', 'shape': [813, 40]}, 'output': {'text': '新教师和师范生代表在尊师重教纪念碑前宣誓静安公园', 'tokenid': [184, 325, 578, 232, 578, 1219, 25, 839, 1116, 51, 2242, 578, 573, 325, 1694, 1120, 945, 931, 663, 2772, 828, 720, 172, 1301], 'shape': [24, 4519]}}), ('IC0008W0457', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:5454681', 'shape': [812, 40]}, 'output': {'text': '水管里流出来的水冲洗着一辆悬挂广东号牌的丰田汽轿车', 'tokenid': [98, 897, 10, 149, 56, 154, 20, 98, 1576, 119, 341, 66, 1604, 2629, 1881, 365, 359, 860, 1252, 20, 1437, 484, 1306, 1969, 129], 'shape': [25, 4519]}}), ('IC0009W0133', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7928969', 'shape': [812, 40]}, 'output': {'text': '宝龙地产的收入较二零一三年增加约百分之三十三点一', 'tokenid': [778, 1062, 52, 1300, 20, 131, 346, 581, 760, 783, 66, 24, 303, 1836, 285, 686, 988, 421, 339, 24, 27, 24, 74, 66], 'shape': [24, 4519]}}), ('ID0044W0517', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.43.ark:7023639', 'shape': [812, 40]}, 'output': {'text': '联建光电LED显示应用产品实现销售收入三点七亿元', 'tokenid': [1008, 917, 452, 30, '<unk>', '<unk>', '<unk>', 862, 948, 677, 261, 1300, 490, 1207, 136, 977, 978, 131, 346, 24, 74, 748, 2721, 1323], 'shape': [24, 4519]}}), ('ID0041W0406', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5291230', 'shape': [811, 40]}, 'output': {'text': '杭州上海和广州电视商务服务指数分列前三位', 'tokenid': [580, 226, 54, 448, 232, 365, 226, 30, 93, 1510, 1018, 41, 1018, 134, 1131, 421, 429, 931, 24, 57], 'shape': [20, 4519]}}), ('IC0003W0478', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8898499', 'shape': [810, 40]}, 'output': {'text': '持刀和铁棍挟持传销头目后引起市民围观成功报警', 'tokenid': [1485, 1102, 232, 1261, 2277, 4490, 1485, 664, 977, 118, 723, 114, 2650, 404, 22, 12, 746, 807, 309, 1100, 326, 949], 'shape': [22, 4519]}}), ('IC0085W0330', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:673701', 'shape': [810, 40]}, 'output': {'text': '中信银行副行长郭党怀在银行业例行新闻发布会透露', 'tokenid': [347, 369, 659, 137, 2195, 137, 176, 1126, 2201, 1257, 51, 659, 137, 575, 454, 137, 184, 185, 570, 985, 174, 1554, 1491], 'shape': [23, 4519]}}), ('IC0080W0202', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:9823240', 'shape': [809, 40]}, 'output': {'text': '严格政府水利投融资企业的年度审计制度', 'tokenid': [1313, 400, 2203, 1146, 98, 526, 49, 2478, 1563, 3202, 575, 20, 303, 139, 2052, 1305, 278, 139], 'shape': [18, 4519]}}), ('IC0096W0485', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9763416', 'shape': [809, 40]}, 'output': {'text': '上海已能对十六种癌基因筛查几率较高将可干预', 'tokenid': [54, 448, 1197, 177, 935, 27, 283, 568, 3647, 228, 377, 4187, 80, 296, 1593, 581, 246, 1611, 47, 175, 32], 'shape': [21, 4519]}}), ('IC0080W0304', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:1884225', 'shape': [808, 40]}, 'output': {'text': '机器换人引发了是否会增加工厂裁员员工失业的疑虑', 'tokenid': [544, 1001, 124, 72, 2650, 570, 63, 60, 61, 174, 1836, 285, 776, 2169, 2869, 994, 994, 776, 716, 575, 20, 1258, 2545], 'shape': [23, 4519]}}), ('ID0043W0516', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:6961787', 'shape': [808, 40]}, 'output': {'text': '早在二零零五年的南宁全国青年竞走锦标赛上', 'tokenid': [418, 51, 760, 783, 783, 401, 303, 20, 135, 1069, 11, 468, 486, 303, 2725, 520, 1298, 55, 1180, 54], 'shape': [20, 4519]}}), ('IC0002W0240', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:3533192', 'shape': [807, 40]}, 'output': {'text': '现在需要对央企现有的经营产品链条进行重新思考', 'tokenid': [136, 51, 212, 99, 935, 809, 3202, 136, 107, 20, 367, 1942, 1300, 490, 1186, 1016, 1566, 137, 573, 184, 693, 1312], 'shape': [22, 4519]}}), ('IC0095W0459', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.24.ark:8490919', 'shape': [807, 40]}, 'output': {'text': '新民网记者在现场采访了该商场工程物业的王姓副总', 'tokenid': [184, 12, 651, 470, 281, 51, 136, 545, 2394, 2386, 63, 569, 1510, 545, 776, 1590, 132, 575, 20, 595, 2615, 2195, 878], 'shape': [23, 4519]}}), ('IC0006W0477', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.6.ark:6772771', 'shape': [806, 40]}, 'output': {'text': '她杀人的秘密直至上周背著尸体的画面曝光后才被曝光', 'tokenid': [886, 531, 72, 20, 981, 982, 1041, 668, 54, 227, 265, 2732, 1911, 451, 20, 1442, 216, 3674, 452, 114, 673, 392, 3674, 452], 'shape': [24, 4519]}}), ('IC0080W0218', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:272187', 'shape': [806, 40]}, 'output': {'text': '尤其是明确了见义勇为死亡人员抚恤补助政策', 'tokenid': [1373, 1144, 60, 308, 1325, 63, 193, 1399, 1475, 200, 502, 2024, 72, 994, 2233, 3817, 2432, 133, 2203, 2730], 'shape': [20, 4519]}}), ('IC0096W0455', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9072946', 'shape': [806, 40]}, 'output': {'text': '上海小龙虾盖浇饭店回应中毒事件对手恶意诽谤', 'tokenid': [54, 448, 7, 1062, 1473, 1161, 1858, 852, 106, 511, 677, 347, 2365, 363, 1105, 935, 241, 712, 439, 2444, '<unk>'], 'shape': [21, 4519]}}), ('IC0083W0433', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:3333608', 'shape': [805, 40]}, 'output': {'text': '三男子租厂房装修三层办公楼诈骗九十八家企业七百三十万', 'tokenid': [24, 995, 393, 2125, 2169, 1362, 942, 698, 24, 1307, 165, 172, 1294, 3373, 1498, 419, 27, 777, 417, 3202, 575, 748, 988, 24, 27, 1569], 'shape': [26, 4519]}}), ('IC0085W0460', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.15.ark:3322671', 'shape': [805, 40]}, 'output': {'text': '该办事处安监办副主任陈武被指酒后无故拉响消防警报', 'tokenid': [569, 165, 363, 665, 720, 2050, 165, 2195, 824, 611, 186, 726, 392, 134, 871, 114, 509, 362, 188, 153, 1012, 955, 949, 326], 'shape': [24, 4519]}}), ('IC0096W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:2475861', 'shape': [804, 40]}, 'output': {'text': '而剩余极少数纯商住宅部分价格被推升至畸高', 'tokenid': [1044, 1004, 169, 1089, 148, 1131, 609, 1510, 381, 1597, 1081, 421, 399, 400, 392, 64, 403, 668, '<unk>', 246], 'shape': [20, 4519]}}), ('IC0096W0169', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:3241668', 'shape': [804, 40]}, 'output': {'text': '国家统计局城市司高级统计师刘建伟进行了解读', 'tokenid': [468, 417, 1072, 1305, 1457, 480, 22, 929, 246, 1107, 1072, 1305, 578, 583, 917, 1230, 1566, 137, 63, 1365, 1199], 'shape': [21, 4519]}}), ('IC0096W0331', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:6615494', 'shape': [804, 40]}, 'output': {'text': '引导社会资本参与充电基础设施体系设运营', 'tokenid': [2650, 85, 1572, 174, 1563, 105, 1326, 501, 411, 30, 228, 2280, 683, 1178, 451, 1009, 683, 625, 1942], 'shape': [19, 4519]}}), ('IC0009W0130', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7856710', 'shape': [803, 40]}, 'output': {'text': '宝龙美术馆已与宝龙华韵宝龙拍卖书藏楼形成联动', 'tokenid': [778, 1062, 567, 1032, 762, 1197, 501, 778, 1062, 562, 357, 778, 1062, 1848, 397, 697, 804, 1294, 1145, 309, 1008, 323], 'shape': [22, 4519]}}), ('IC0003W0192', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:2969261', 'shape': [801, 40]}, 'output': {'text': '今年北京四川河南等省市率先公布了三公经费', 'tokenid': [267, 303, 224, 360, 366, 1334, 223, 135, 537, 514, 22, 1593, 564, 172, 985, 63, 24, 172, 367, 1005], 'shape': [20, 4519]}}), ('IC0007W0177', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:1312637', 'shape': [801, 40]}, 'output': {'text': '北京深圳由去年年中的二十个月降至年底的十二个月', 'tokenid': [224, 360, 695, 1193, 1092, 125, 303, 303, 347, 20, 760, 27, 39, 745, 591, 668, 303, 1195, 20, 27, 760, 39, 745], 'shape': [23, 4519]}}), ('IC0084W0170', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:7087869', 'shape': [801, 40]}, 'output': {'text': '其监测十大重点城市四十四个老推新项目中有一半出现价格上涨', 'tokenid': [1144, 2050, 2126, 27, 217, 573, 74, 480, 22, 366, 27, 366, 39, 282, 64, 184, 2447, 723, 347, 107, 66, 1170, 56, 136, 399, 400, 54, 3385], 'shape': [28, 4519]}}), ('IC0097W0301', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:4822197', 'shape': [801, 40]}, 'output': {'text': '外媒BUSINESSINSIDER认为', 'tokenid': [215, 2259, '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', '<unk>', 199, 200], 'shape': [19, 4519]}}), ('ID0041W0515', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7702267', 'shape': [801, 40]}, 'output': {'text': '而作为软硬件提供者的英特尔更是深谙此道', 'tokenid': [1044, 547, 200, 1577, 1726, 1105, 239, 2214, 281, 20, 493, 336, 529, 557, 60, 695, 3020, 1099, 44], 'shape': [19, 4519]}}), ('IC0003W0278', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:4673899', 'shape': [800, 40]}, 'output': {'text': 'CES的所有利润均再投资到CEA的产业服务中', 'tokenid': ['<unk>', '<unk>', '<unk>', 20, 434, 107, 526, 2166, 671, 386, 49, 1563, 75, '<unk>', '<unk>', '<unk>', 20, 1300, 575, 41, 1018, 347], 'shape': [22, 4519]}})]-----------------"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('IC0085W0214', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:7183574', 'shape': [879, 40]}, 'output': {'text': '某保险公司权益投资部总经理曾某借职务之便建老鼠仓', 'tokenid': [71, 628, 629, 172, 929, 1480, 2279, 49, 1563, 1081, 878, 367, 666, 1935, 71, 714, 2124, 1018, 339, 293, 917, 282, 1441, 2852], 'shape': [24, 4519]}}), ('IC0009W0360', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:3432159', 'shape': [878, 40]}, 'output': {'text': '首先由带有浓浓山海关特色的项目一古城定向开场', 'tokenid': [207, 564, 1092, 494, 107, 2702, 2702, 523, 448, 179, 336, 724, 20, 2447, 723, 66, 1128, 480, 87, 543, 95, 545], 'shape': [22, 4519]}}), ('IC0007W0129', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:297573', 'shape': [877, 40]}, 'output': {'text': '持股比例直逼宁波银行第一大股东新加坡华侨银行', 'tokenid': [1485, 950, 268, 454, 1041, 2357, 1069, 1735, 659, 137, 254, 66, 217, 950, 359, 184, 285, 1853, 562, 2989, 659, 137], 'shape': [22, 4519]}}), ('ID0020W0404', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.29.ark:4167261', 'shape': [877, 40]}, 'output': {'text': '浙江杀人嫌犯潜逃一七年持兄长身份证乘地铁被抓', 'tokenid': [2008, 356, 531, 72, 2129, 2270, 2474, 2023, 66, 748, 303, 1485, 589, 176, 701, 858, 859, 1771, 52, 1261, 392, 1955], 'shape': [22, 4519]}}), ('ID0043W0520', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:7043799', 'shape': [877, 40]}, 'output': {'text': '严禁骗取套取中央和省级财政城市棚户区改造专项资金', 'tokenid': [1313, 1386, 1498, 1011, 1866, 1011, 347, 809, 232, 514, 1107, 1715, 2203, 480, 22, 1171, 168, 396, 630, 1855, 204, 2447, 1563, 497], 'shape': [24, 4519]}}), ('IC0080W0461', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:5212413', 'shape': [875, 40]}, 'output': {'text': '三兄弟驾渔船长江翻船老大漂流十余公里获救', 'tokenid': [24, 589, 590, 1772, 3739, 1028, 176, 356, 1815, 1028, 282, 217, 412, 149, 27, 169, 172, 10, 1963, 1347], 'shape': [20, 4519]}}), ('IC0009W0126', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.8.ark:7748858', 'shape': [873, 40]}, 'output': {'text': '以及类酒店产品宝龙客栈及宝龙少海房车露营地', 'tokenid': [48, 435, 340, 871, 106, 1300, 490, 778, 1062, 1364, 3147, 435, 778, 1062, 148, 448, 1362, 129, 1491, 1942, 52], 'shape': [21, 4519]}}), ('IC0007W0454', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7133698', 'shape': [872, 40]}, 'output': {'text': '六内地乘客打伤七香港机场的勤续四人被判处监禁', 'tokenid': [283, 519, 52, 1771, 1364, 94, 209, 748, 1113, 1135, 544, 545, 20, 1754, 1856, 366, 72, 392, 1786, 665, 2050, 1386], 'shape': [22, 4519]}}), ('IC0080W0127', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:8159405', 'shape': [872, 40]}, 'output': {'text': '野三坡风景名胜区范围为为东北方向与北京市相邻', 'tokenid': [485, 24, 1853, 145, 266, 444, 1148, 396, 1219, 746, 200, 200, 359, 224, 292, 543, 501, 224, 360, 22, 831, 3331], 'shape': [22, 4519]}}), ('IC0096W0374', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:7466033', 'shape': [872, 40]}, 'output': {'text': '东道主福州队的徐婷婷摘得女子飞碟双向桂冠', 'tokenid': [359, 44, 824, 642, 226, 1060, 20, 856, 1870, 1870, 3220, 471, 766, 393, 406, 2400, 255, 543, 1761, 1462], 'shape': [20, 4519]}}), ('IC0007W0128', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:262380', 'shape': [871, 40]}, 'output': {'text': '雅戈尔持有宁波银行总股份达三点五一亿股', 'tokenid': [498, 2812, 529, 1485, 107, 1069, 1735, 659, 137, 878, 950, 858, 752, 24, 74, 401, 66, 2721, 950], 'shape': [19, 4519]}}), ('ID0043W0431', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.42.ark:5277342', 'shape': [868, 40]}, 'output': {'text': '而张继科则是拿下了生涯第六个全锦赛男单奖牌', 'tokenid': [1044, 474, 2137, 1523, 2134, 60, 933, 142, 63, 25, 2319, 254, 283, 39, 11, 1298, 1180, 995, 159, 1173, 1252], 'shape': [21, 4519]}}), ('IC0080W0432', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4527736', 'shape': [867, 40]}, 'output': {'text': '三亚景区售玳瑁三万元两只二月份曾被立案调查', 'tokenid': [24, 527, 266, 396, 978, '<unk>', '<unk>', 24, 1569, 1323, 505, 96, 760, 745, 858, 1935, 392, 1369, 566, 160, 80], 'shape': [21, 4519]}}), ('IC0007W0168', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:1098660', 'shape': [866, 40]}, 'output': {'text': '信贷和公积金政策的逐渐宽松对市场而言是较大的利好', 'tokenid': [369, 2975, 232, 172, 1621, 497, 2203, 2730, 20, 3382, 1830, 1790, 354, 935, 22, 545, 1044, 4, 60, 581, 217, 20, 526, 120], 'shape': [24, 4519]}}), ('IC0080W0411', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4096163', 'shape': [865, 40]}, 'output': {'text': '中国新闻网七月二十三日报道据台湾东森新闻消息', 'tokenid': [347, 468, 184, 185, 651, 748, 745, 760, 27, 24, 521, 326, 44, 1775, 67, 1917, 359, 1505, 184, 185, 1012, 997], 'shape': [22, 4519]}}), ('IC0007W0481', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:7810269', 'shape': [863, 40]}, 'output': {'text': '六岁女童村中被扎身亡货车扎头部后仍继续行驶', 'tokenid': [283, 796, 766, 1068, 496, 347, 392, 2387, 701, 2024, 996, 129, 2387, 118, 1081, 114, 3031, 2137, 1856, 137, 138], 'shape': [21, 4519]}}), ('IC0080W0385', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:3581018', 'shape': [863, 40]}, 'output': {'text': '难遇天才竟急速沉沦张继科在负面新闻中渐行渐远', 'tokenid': [937, 699, 78, 673, 2155, 455, 62, 1797, 2855, 474, 2137, 1523, 51, 731, 216, 184, 185, 347, 1830, 137, 1830, 306], 'shape': [22, 4519]}}), ('IC0003W0143', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:1890364', 'shape': [858, 40]}, 'output': {'text': '怎么完成传统房企与互联网融合落地都是其研究的大事', 'tokenid': [101, 109, 37, 309, 664, 1072, 1362, 3202, 501, 2806, 1008, 651, 2478, 45, 717, 52, 173, 60, 1144, 1918, 1919, 20, 217, 363], 'shape': [24, 4519]}}), ('IC0080W0443', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4757299', 'shape': [858, 40]}, 'output': {'text': '三亚餐厅扇贝十五元一份变十五元一个续同意退款', 'tokenid': [24, 527, 884, 1542, 2789, 863, 27, 401, 1323, 66, 858, 730, 27, 401, 1323, 66, 39, 1856, 426, 439, 627, 104], 'shape': [22, 4519]}}), ('ID0047W0379', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.46.ark:4383869', 'shape': [858, 40]}, 'output': {'text': '索玛基金会理事长黄红斌被当地森林公安带走', 'tokenid': [16, 2017, 228, 497, 174, 666, 363, 176, 489, 457, 816, 392, 472, 52, 1505, 593, 172, 720, 494, 520], 'shape': [20, 4519]}}), ('IC0080W0449', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.10.ark:4918257', 'shape': [857, 40]}, 'output': {'text': '华商报咸阳讯记者张林三小伙伴相约出去玩水', 'tokenid': [562, 1510, 326, 3364, 600, 1992, 470, 281, 474, 593, 24, 7, 684, 409, 831, 686, 56, 125, 221, 98], 'shape': [20, 4519]}}), ('IC0087W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.16.ark:5112895', 'shape': [855, 40]}, 'output': {'text': '国家统计局今日发布七十大中城市房价变动情况', 'tokenid': [468, 417, 1072, 1305, 1457, 267, 521, 570, 985, 748, 27, 217, 347, 480, 22, 1362, 399, 730, 323, 164, 473], 'shape': [21, 4519]}}), ('IC0097W0163', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.26.ark:2539429', 'shape': [855, 40]}, 'output': {'text': '二零一五年上半年又经历了三次降息两次降准调整力度堪比二零零八年', 'tokenid': [760, 783, 66, 401, 303, 54, 1170, 303, 324, 367, 1285, 63, 24, 1204, 591, 997, 505, 1204, 591, 35, 160, 658, 270, 139, 3053, 268, 760, 783, 783, 777, 303], 'shape': [31, 4519]}}), ('IC0096W0479', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:9619418', 'shape': [854, 40]}, 'output': {'text': '上海崇明通报村官腐案虚开发票套取公款等', 'tokenid': [54, 448, 1960, 308, 178, 326, 496, 1234, 1595, 566, 2475, 95, 570, 626, 1866, 1011, 172, 104, 537], 'shape': [19, 4519]}}), ('ID0041W0399', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:5125879', 'shape': [854, 40]}, 'output': {'text': '现在微传科技也积极在海外对自己的技术进行保护', 'tokenid': [136, 51, 719, 664, 1523, 1555, 229, 1621, 1089, 51, 448, 215, 935, 275, 276, 20, 1555, 1032, 1566, 137, 628, 1670], 'shape': [22, 4519]}}), ('IC0003W0446', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8134883', 'shape': [853, 40]}, 'output': {'text': '孩子的父亲在手术室外痛哭昨日下午二时三十分左右', 'tokenid': [866, 393, 20, 614, 615, 51, 241, 1032, 782, 215, 634, 1447, 605, 521, 142, 966, 760, 83, 24, 27, 421, 1824, 2063], 'shape': [23, 4519]}}), ('IC0003W0480', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:8949725', 'shape': [853, 40]}, 'output': {'text': '五男孩去玩水一人溺亡小伙伴将其物品扔掉隐瞒', 'tokenid': [401, 995, 866, 125, 221, 98, 66, 72, 3390, 2024, 7, 684, 409, 1611, 1144, 132, 490, 2880, 1337, 2196, 1862], 'shape': [21, 4519]}}), ('IC0002W0145', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.2.ark:1716617', 'shape': [852, 40]}, 'output': {'text': '而另外一剂楼市强心剂则当属央行的不对称降息', 'tokenid': [1044, 2092, 215, 66, 3040, 1294, 22, 844, 163, 3040, 2134, 472, 620, 809, 137, 20, 42, 935, 938, 591, 997], 'shape': [21, 4519]}}), ('IC0003W0482', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.3.ark:9006551', 'shape': [852, 40]}, 'output': {'text': '周至警方初步排除他杀可能一当事孩子称', 'tokenid': [227, 668, 949, 292, 1795, 345, 144, 587, 194, 531, 47, 177, 66, 472, 363, 866, 393, 938], 'shape': [18, 4519]}}), ('IC0007W0253', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.7.ark:2915505', 'shape': [851, 40]}, 'output': {'text': '透明窗式显示屏交互式虚拟镜子以及拍照手机等', 'tokenid': [1554, 308, 770, 821, 862, 948, 1612, 81, 2806, 821, 2475, 3479, 1385, 393, 48, 435, 1848, 1259, 241, 544, 537], 'shape': [21, 4519]}}), ('IC0084W0154', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.13.ark:6790101', 'shape': [850, 40]}, 'output': {'text': '五十四个城市住宅签约总量达十九万四千九百零九套', 'tokenid': [401, 27, 366, 39, 480, 22, 381, 1597, 1959, 686, 878, 150, 752, 27, 419, 1569, 366, 1242, 419, 988, 783, 419, 1866], 'shape': [23, 4519]}}), ('IC0009W0452', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.9.ark:5256635', 'shape': [849, 40]}, 'output': {'text': '六旬大妈伪造签证多次赴美涉偷越国境被判管制一年', 'tokenid': [283, 2840, 217, 939, 1515, 1855, 1959, 859, 147, 1204, 3497, 567, 1743, 1344, 446, 468, 2332, 392, 1786, 897, 278, 66, 303], 'shape': [23, 4519]}})]-----------------"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "('IC0096W0217', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.25.ark:4231612', 'shape': [830, 40]}, 'output': {'text': '问这次审计中的新农合资金是由卫生部管理的', 'tokenid': [77, 9, 1204, 2052, 1305, 347, 20, 184, 1065, 45, 1563, 497, 60, 1092, 823, 25, 1081, 897, 666, 20], 'shape': [20, 4519]}})-----------------\n",
      "('ID0041W0505', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.40.ark:7505017', 'shape': [815, 40]}, 'output': {'text': '江苏重复地名让人傻傻分不清三个周庄两个茅山', 'tokenid': [356, 374, 573, 277, 52, 444, 385, 72, 1327, 1327, 421, 42, 768, 24, 39, 227, 450, 505, 39, 3452, 523], 'shape': [21, 4519]}})\n",
      "\n",
      "('IC0085W0214', {'input': {'feat': '/home1/meichaoyang/dataset/data_aishell2/feats/test/_fbank/raw_fbank_test.14.ark:7183574', 'shape': [879, 40]}, 'output': {'text': '某保险公司权益投资部总经理曾某借职务之便建老鼠仓', 'tokenid': [71, 628, 629, 172, 929, 1480, 2279, 49, 1563, 1081, 878, 367, 666, 1935, 71, 714, 2124, 1018, 339, 293, 917, 282, 1441, 2852], 'shape': [24, 4519]}})\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "unsupported operand type(s) for +: 'int' and 'str'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-6f9c62e1016d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mte_loader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\" : \"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'int' and 'str'"
     ]
    }
   ],
   "source": [
    "for i, (data) in enumerate(te_loader):\n",
    "    print(i+\" : \"+data)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
